@@ -175,3 +175,54 @@ def test_dataset_write_schema_applied_to_dataset(new_dataset, sample_records_ite
175175 )
176176
177177 assert set (dataset .schema .names ) == set (TIMDEX_DATASET_SCHEMA .names )
178+
179+
180+ def test_dataset_write_partition_deleted_when_written_to_again (
181+ new_dataset , sample_records_iter
182+ ):
183+ """This tests the existing_data_behavior="delete_matching" configuration when writing
184+ to a dataset."""
185+ partition_values = {
186+ "source" : "alma" ,
187+ "run_date" : "2024-12-01" ,
188+ "run_type" : "daily" ,
189+ "action" : "index" ,
190+ "run_id" : "000-111-aaa-bbb" ,
191+ }
192+
193+ # perform FIRST write to run_date="2024-12-01"
194+ written_files_1 = new_dataset .write (
195+ sample_records_iter (10 ),
196+ partition_values = partition_values ,
197+ )
198+
199+ # assert that files from first write are present at this time
200+ assert os .path .exists (written_files_1 [0 ].path )
201+
202+ # perform unrelated write with new run_date to confirm this is untouched during delete
203+ new_partition_values = partition_values .copy ()
204+ new_partition_values ["run_date" ] = "2024-12-15"
205+ new_partition_values ["run_id" ] = "222-333-ccc-ddd"
206+ written_files_x = new_dataset .write (
207+ sample_records_iter (7 ),
208+ partition_values = new_partition_values ,
209+ )
210+
211+ # perform SECOND write to run_date="2024-12-01", expecting this to delete everything
212+ # under this combination of partitions (i.e. the first write)
213+ written_files_2 = new_dataset .write (
214+ sample_records_iter (10 ),
215+ partition_values = partition_values ,
216+ )
217+
218+ new_dataset .reload ()
219+
220+ # assert 17 rows: second write for run_date="2024-12-01" @ 10 rows +
221+ # run_date="2024-12-15" @ 5 rows
222+ assert new_dataset .row_count == 17
223+
224+ # assert that files from first run_date="2024-12-01" are gone, second exist
225+ # and files from run_date="2024-12-15" also exist
226+ assert not os .path .exists (written_files_1 [0 ].path )
227+ assert os .path .exists (written_files_2 [0 ].path )
228+ assert os .path .exists (written_files_x [0 ].path )
0 commit comments