Skip to content

Commit f7fb7eb

Browse files
fix: add insert-by-name support to microbatch and replace_where strategies (#1338) (#1348)
resolves #1338 ### description added `INSERT BY NAME` support to the `get_replace_where_sql` macro used by both the `microbatch` and `replace_where` incremental strategies. previously these strategies used the positional `INSERT INTO ... TABLE` syntax which would silently produce wrong data (or error) when column order changed between the temp and target tables. this fix follows the identical pattern already established in the `append`, `insert_overwrite`, and `delete+insert` strategies checking `adapter.has_dbr_capability('insert_by_name')` and appending `BY NAME` when the runtime supports it (DBR 12.2+). **before:** ```sql INSERT INTO target_table REPLACE WHERE event_time >= '2023-01-01' and event_time < '2023-01-02' TABLE temp_table ``` **after (DBR 12.2+):** ```sql INSERT INTO target_table REPLACE WHERE event_time >= '2023-01-01' and event_time < '2023-01-02' BY NAME TABLE temp_table ``` key notes: - older DBR versions are unaffected (`BY NAME` is only emitted when the capability is detected). - backward compatible `BY NAME` only emitted when DBR 12.2+ is detected via `adapter.has_dbr_capability('insert_by_name')` - both strategies fixed since `microbatch` delegates to `get_replace_where_sql`, fixing the shared macro fixes both ### tests all pass across all incremental macro tests. ```bash python -m pytest tests/unit/macros/materializations/incremental/ -v ``` ### Checklist - [x] I have run this code in development and it appears to resolve the stated issue - [x] This PR includes tests, or tests are not required/relevant for this PR - [x] I have updated the CHANGELOG.md and added information about my change to the "dbt-databricks next" section --------- Signed-off-by: aarushisingh04 <aarushi07.singh@gmail.com> Co-authored-by: Shubham Dhal <shubham.dhal@databricks.com>
1 parent 65b24ca commit f7fb7eb

6 files changed

Lines changed: 224 additions & 14 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## dbt-databricks 1.11.7 (TBD)
2+
3+
### Fixes
4+
5+
- Fix column order mismatch in microbatch and replace_where incremental strategies by using INSERT BY NAME syntax ([#1338](https://github.com/databricks/dbt-databricks/issues/1338))
6+
17
## dbt-databricks 1.11.6 (Mar 10, 2026)
28

39
### Features

dbt/include/databricks/macros/materializations/incremental/strategies.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,9 @@
121121
{%- set predicates = args_dict['incremental_predicates'] -%}
122122
{%- set target_relation = args_dict['target_relation'] -%}
123123
{%- set temp_relation = args_dict['temp_relation'] -%}
124+
{%- set has_insert_by_name = adapter.has_dbr_capability('insert_by_name') -%}
124125
INSERT INTO {{ target_relation.render() }}
126+
{%- if has_insert_by_name %} BY NAME{% endif %}
125127
{%- if predicates %}
126128
{%- if predicates is sequence and predicates is not string %}
127129
REPLACE WHERE {{ predicates | join(' and ') }}

tests/functional/adapter/microbatch/fixtures.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,45 @@
1414
- name: event_time
1515
description: "Timestamp of the event"
1616
"""
17+
18+
microbatch_seeds_csv = """
19+
id,event_time,amount
20+
1,2023-01-01,100
21+
2,2023-01-01,200
22+
3,2023-01-02,300
23+
""".strip()
24+
25+
# Initial model: columns in (id, event_time, amount) order
26+
microbatch_model_sql = """
27+
{{ config(
28+
materialized='incremental',
29+
incremental_strategy='microbatch',
30+
event_time='event_time',
31+
begin='2023-01-01',
32+
batch_size='day'
33+
) }}
34+
select id, event_time, amount from {{ ref('microbatch_seeds') }}
35+
"""
36+
37+
# Reordered model: columns in (amount, id, event_time) order — this is the key scenario
38+
# Without BY NAME, positional INSERT would silently corrupt data here
39+
microbatch_model_reordered_sql = """
40+
{{ config(
41+
materialized='incremental',
42+
incremental_strategy='microbatch',
43+
event_time='event_time',
44+
begin='2023-01-01',
45+
batch_size='day'
46+
) }}
47+
select amount, id, event_time from {{ ref('microbatch_seeds') }}
48+
"""
49+
50+
schema_yml = """
51+
version: 2
52+
models:
53+
- name: microbatch_model
54+
columns:
55+
- name: id
56+
- name: event_time
57+
- name: amount
58+
"""
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import pytest
2+
from dbt.tests import util
3+
4+
from tests.functional.adapter.microbatch.fixtures import (
5+
microbatch_seeds_csv,
6+
schema_yml,
7+
)
8+
9+
# Uses replace_where strategy (same macro as microbatch) to verify that
10+
# column reordering between runs does not corrupt data.
11+
_initial_model_sql = """
12+
{{ config(
13+
materialized='incremental',
14+
incremental_strategy='replace_where',
15+
incremental_predicates="id >= 2",
16+
) }}
17+
18+
{% if not is_incremental() %}
19+
select id, event_time, amount from {{ ref('microbatch_seeds') }}
20+
{% else %}
21+
select id, event_time, amount from {{ ref('microbatch_seeds') }} where id >= 2
22+
{% endif %}
23+
"""
24+
25+
# Same logic but columns in a different order (amount, id, event_time)
26+
_reordered_model_sql = """
27+
{{ config(
28+
materialized='incremental',
29+
incremental_strategy='replace_where',
30+
incremental_predicates="id >= 2",
31+
) }}
32+
33+
{% if not is_incremental() %}
34+
select amount, id, event_time from {{ ref('microbatch_seeds') }}
35+
{% else %}
36+
select amount, id, event_time from {{ ref('microbatch_seeds') }} where id >= 2
37+
{% endif %}
38+
"""
39+
40+
41+
class TestReplaceWhereColumnOrder:
42+
"""
43+
Verifies that the replace_where strategy (shared with microbatch) preserves
44+
data integrity when column order changes between the temp and target tables.
45+
46+
The fix uses INSERT BY NAME so that columns are matched by name, not position.
47+
"""
48+
49+
@pytest.fixture(scope="class")
50+
def models(self):
51+
return {
52+
"replace_where_col_order.sql": _initial_model_sql,
53+
"schema.yml": schema_yml,
54+
}
55+
56+
@pytest.fixture(scope="class")
57+
def seeds(self):
58+
return {
59+
"microbatch_seeds.csv": microbatch_seeds_csv,
60+
}
61+
62+
def test_replace_where_column_order(self, project):
63+
# Seed and initial run — creates the table with original column order
64+
util.run_dbt(["seed"])
65+
util.run_dbt(["run"])
66+
67+
# Verify initial data
68+
actual_initial = project.run_sql(
69+
f"select id, amount from {project.database}.{project.test_schema}"
70+
".replace_where_col_order order by id",
71+
fetch="all",
72+
)
73+
assert len(actual_initial) == 3
74+
assert actual_initial[0] == (1, 100)
75+
assert actual_initial[1] == (2, 200)
76+
assert actual_initial[2] == (3, 300)
77+
78+
# Swap to the reordered model (amount, id, event_time)
79+
util.write_file(
80+
_reordered_model_sql,
81+
str(project.project_root) + "/models/replace_where_col_order.sql",
82+
)
83+
84+
# Incremental run with reordered columns
85+
util.run_dbt(["run"])
86+
87+
# Verify data integrity — columns must match by name, not position
88+
actual_final = project.run_sql(
89+
f"select id, amount from {project.database}.{project.test_schema}"
90+
".replace_where_col_order order by id",
91+
fetch="all",
92+
)
93+
94+
assert len(actual_final) == 3
95+
# id=1 was not replaced (predicate is id >= 2), so it stays the same
96+
assert actual_final[0] == (1, 100)
97+
# These rows were replaced — if column order was wrong, amount and id
98+
# would be swapped (e.g., id=200 amount=2 instead of id=2 amount=200)
99+
assert actual_final[1] == (2, 200)
100+
assert actual_final[2] == (3, 300)

tests/unit/macros/materializations/incremental/test_microbatch_macros.py

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@ def template_name(self) -> str:
1414
def macro_folders_to_load(self) -> list:
1515
return ["macros", "macros/materializations/incremental"]
1616

17+
@pytest.fixture(autouse=True)
18+
def setup_mock_capability(self, context):
19+
"""Mock the adapter's has_dbr_capability to support insert_by_name by default"""
20+
21+
def has_dbr_capability_side_effect(capability_name):
22+
if capability_name == "insert_by_name":
23+
return True # Default to DBR 12.2+
24+
return False
25+
26+
context["adapter"].has_dbr_capability = Mock(side_effect=has_dbr_capability_side_effect)
27+
1728
@pytest.fixture
1829
def mock_temp(self):
1930
relation = Mock()
@@ -50,8 +61,9 @@ def test_databricks__get_incremental_microbatch_sql_with_start_and_end(
5061

5162
expected = """
5263
insert into `some_database`.`some_schema`.`some_table`
53-
replace where cast(event_timestamp as TIMESTAMP) >= '2023-01-01 00:00:00'
54-
and cast(event_timestamp as TIMESTAMP) < '2023-01-02 00:00:00' table `temp_table`
64+
by name replace where cast(event_timestamp as TIMESTAMP) >= '2023-01-01 00:00:00'
65+
and cast(event_timestamp as TIMESTAMP) < '2023-01-02 00:00:00'
66+
table `temp_table`
5567
"""
5668

5769
self.assert_sql_equal(result, expected)
@@ -75,7 +87,8 @@ def test_databricks__get_incremental_microbatch_sql_with_start_only(
7587

7688
expected = """
7789
insert into `some_database`.`some_schema`.`some_table`
78-
replace where cast(event_timestamp as TIMESTAMP) >= '2023-01-01 00:00:00' table `temp_table`
90+
by name replace where cast(event_timestamp as TIMESTAMP) >= '2023-01-01 00:00:00'
91+
table `temp_table`
7992
"""
8093

8194
self.assert_sql_equal(result, expected)
@@ -99,7 +112,8 @@ def test_databricks__get_incremental_microbatch_sql_with_end_only(
99112

100113
expected = """
101114
insert into `some_database`.`some_schema`.`some_table`
102-
replace where cast(event_timestamp as TIMESTAMP) < '2023-01-02 00:00:00' table `temp_table`
115+
by name replace where cast(event_timestamp as TIMESTAMP) < '2023-01-02 00:00:00'
116+
table `temp_table`
103117
"""
104118

105119
self.assert_sql_equal(result, expected)
@@ -125,9 +139,10 @@ def test_databricks__get_incremental_microbatch_sql_with_existing_predicates(
125139

126140
expected = """
127141
insert into `some_database`.`some_schema`.`some_table`
128-
replace where col1 = 'value'
142+
by name replace where col1 = 'value'
129143
and col2 > 100 and cast(event_timestamp as TIMESTAMP) >= '2023-01-01 00:00:00'
130-
and cast(event_timestamp as TIMESTAMP) < '2023-01-02 00:00:00' table `temp_table`
144+
and cast(event_timestamp as TIMESTAMP) < '2023-01-02 00:00:00'
145+
table `temp_table`
131146
"""
132147

133148
self.assert_sql_equal(result, expected)
@@ -151,7 +166,37 @@ def test_databricks__get_incremental_microbatch_sql_without_start_or_end(
151166

152167
expected = """
153168
insert into `some_database`.`some_schema`.`some_table`
154-
table `temp_table`
169+
by name table `temp_table`
170+
"""
171+
172+
self.assert_sql_equal(result, expected)
173+
174+
def test_databricks__get_incremental_microbatch_sql_without_insert_by_name(
175+
self, template_bundle, context, config, mock_arg_dict
176+
):
177+
"""Test that BY NAME is omitted when the DBR version doesn't support insert_by_name"""
178+
179+
context["adapter"].has_dbr_capability = Mock(return_value=False)
180+
181+
context["model"] = {"config": {"event_time": "event_timestamp"}}
182+
config["__dbt_internal_microbatch_event_time_start"] = "2023-01-01 00:00:00"
183+
config["__dbt_internal_microbatch_event_time_end"] = "2023-01-02 00:00:00"
184+
185+
context["return"] = Mock()
186+
187+
self.run_macro_raw(
188+
template_bundle.template,
189+
"databricks__get_incremental_microbatch_sql",
190+
mock_arg_dict,
191+
)
192+
193+
result = context["return"].call_args[0][0]
194+
195+
expected = """
196+
insert into `some_database`.`some_schema`.`some_table`
197+
replace where cast(event_timestamp as TIMESTAMP) >= '2023-01-01 00:00:00'
198+
and cast(event_timestamp as TIMESTAMP) < '2023-01-02 00:00:00'
199+
table `temp_table`
155200
"""
156201

157202
self.assert_sql_equal(result, expected)

tests/unit/macros/materializations/incremental/test_replace_where_macros.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@ def template_name(self) -> str:
1414
def macro_folders_to_load(self) -> list:
1515
return ["macros", "macros/materializations/incremental"]
1616

17+
@pytest.fixture(autouse=True)
18+
def setup_mock_capability(self, context):
19+
"""Mock the adapter's has_dbr_capability to support insert_by_name by default"""
20+
21+
def has_dbr_capability_side_effect(capability_name):
22+
if capability_name == "insert_by_name":
23+
return True # Default to DBR 12.2+
24+
return False
25+
26+
context["adapter"].has_dbr_capability = Mock(side_effect=has_dbr_capability_side_effect)
27+
1728
@pytest.fixture
1829
def mock_relations(self):
1930
target_relation = Mock()
@@ -37,7 +48,7 @@ def test_get_replace_where_sql_with_string_predicate(self, template_bundle, mock
3748

3849
expected = """
3950
INSERT INTO schema.target_table
40-
REPLACE WHERE date_col > '2023-01-01'
51+
BY NAME REPLACE WHERE date_col > '2023-01-01'
4152
TABLE schema.temp_table
4253
"""
4354

@@ -59,7 +70,7 @@ def test_get_replace_where_sql_with_predicate_list(self, template_bundle, mock_r
5970

6071
expected = """
6172
INSERT INTO schema.target_table
62-
REPLACE WHERE date_col > '2023-01-01' and another_col != 'value'
73+
BY NAME REPLACE WHERE date_col > '2023-01-01' and another_col != 'value'
6374
TABLE schema.temp_table
6475
"""
6576

@@ -78,7 +89,7 @@ def test_get_replace_where_sql_without_predicates(self, template_bundle, mock_re
7889

7990
expected = """
8091
INSERT INTO schema.target_table
81-
TABLE schema.temp_table
92+
BY NAME TABLE schema.temp_table
8293
"""
8394

8495
self.assert_sql_equal(result, expected)
@@ -96,7 +107,7 @@ def test_get_replace_where_sql_with_empty_predicate_list(self, template_bundle,
96107

97108
expected = """
98109
INSERT INTO schema.target_table
99-
TABLE schema.temp_table
110+
BY NAME TABLE schema.temp_table
100111
"""
101112

102113
self.assert_sql_equal(result, expected)
@@ -118,14 +129,18 @@ def test_get_replace_where_sql_with_complex_predicates(self, template_bundle, mo
118129

119130
expected = """
120131
INSERT INTO schema.target_table
121-
REPLACE WHERE date_col BETWEEN '2023-01-01' AND '2023-01-31' and status IN ('active', 'pending') and amount > 1000
132+
BY NAME REPLACE WHERE date_col BETWEEN '2023-01-01' AND '2023-01-31' and status IN ('active', 'pending') and amount > 1000
122133
TABLE schema.temp_table
123134
""" # noqa
124135

125136
self.assert_sql_equal(result, expected)
126137

127-
def test_get_replace_where_sql__by_name_with_predicates(self, template_bundle, mock_relations):
128-
"""Test that get_replace_where_sql does not use BY NAME (removed as of recent change)"""
138+
def test_get_replace_where_sql__without_insert_by_name(
139+
self, template_bundle, context, mock_relations
140+
):
141+
"""Test that BY NAME is omitted when the DBR version doesn't support insert_by_name"""
142+
context["adapter"].has_dbr_capability = Mock(return_value=False)
143+
129144
target_relation, temp_relation = mock_relations
130145

131146
args_dict = {

0 commit comments

Comments
 (0)