@@ -137,6 +137,7 @@ def test_dataset_load_with_multi_nonpartition_filters_success(fixed_local_datase
137137 assert fixed_local_dataset .row_count == 1
138138
139139
140+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
140141def test_dataset_load_current_records_all_sources_success (dataset_with_runs_location ):
141142 timdex_dataset = TIMDEXDataset (dataset_with_runs_location )
142143
@@ -149,6 +150,7 @@ def test_dataset_load_current_records_all_sources_success(dataset_with_runs_loca
149150 assert len (timdex_dataset .dataset .files ) == 12
150151
151152
153+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
152154def test_dataset_load_current_records_one_source_success (dataset_with_runs_location ):
153155 timdex_dataset = TIMDEXDataset (dataset_with_runs_location )
154156 timdex_dataset .load (current_records = True , source = "alma" )
@@ -346,9 +348,9 @@ def test_dataset_local_dataset_row_count_missing_dataset_raise_error(local_datas
346348 _ = td .row_count
347349
348350
349- def test_dataset_all_records_not_current_and_not_deduped (local_dataset_with_runs ):
350- local_dataset_with_runs .load ()
351- all_records_df = local_dataset_with_runs .read_dataframe ()
351+ def test_dataset_all_records_not_current_and_not_deduped (dataset_with_runs ):
352+ dataset_with_runs .load ()
353+ all_records_df = dataset_with_runs .read_dataframe ()
352354
353355 # assert counts reflect all records from dataset, no deduping
354356 assert all_records_df .source .value_counts ().to_dict () == {"alma" : 254 , "dspace" : 194 }
@@ -358,9 +360,10 @@ def test_dataset_all_records_not_current_and_not_deduped(local_dataset_with_runs
358360 assert all_records_df .run_date .max () == date (2025 , 2 , 5 )
359361
360362
361- def test_dataset_all_current_records_deduped (local_dataset_with_runs ):
362- local_dataset_with_runs .load (current_records = True )
363- all_records_df = local_dataset_with_runs .read_dataframe ()
363+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
364+ def test_dataset_all_current_records_deduped (dataset_with_runs ):
365+ dataset_with_runs .load (current_records = True )
366+ all_records_df = dataset_with_runs .read_dataframe ()
364367
365368 # assert both sources have accurate record counts for current records only
366369 assert all_records_df .source .value_counts ().to_dict () == {"dspace" : 90 , "alma" : 100 }
@@ -373,9 +376,10 @@ def test_dataset_all_current_records_deduped(local_dataset_with_runs):
373376 assert all_records_df .run_date .max () == date (2025 , 2 , 5 ) # dspace
374377
375378
376- def test_dataset_source_current_records_deduped (local_dataset_with_runs ):
377- local_dataset_with_runs .load (current_records = True , source = "alma" )
378- alma_records_df = local_dataset_with_runs .read_dataframe ()
379+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
380+ def test_dataset_source_current_records_deduped (dataset_with_runs ):
381+ dataset_with_runs .load (current_records = True , source = "alma" )
382+ alma_records_df = dataset_with_runs .read_dataframe ()
379383
380384 # assert only alma records present and correct count
381385 assert alma_records_df .source .value_counts ().to_dict () == {"alma" : 100 }
@@ -388,36 +392,40 @@ def test_dataset_source_current_records_deduped(local_dataset_with_runs):
388392 assert alma_records_df .run_date .max () == date (2025 , 1 , 5 )
389393
390394
395+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
391396def test_dataset_all_read_methods_get_deduplication (
392- local_dataset_with_runs ,
397+ dataset_with_runs ,
393398):
394- local_dataset_with_runs .load (current_records = True , source = "alma" )
399+ dataset_with_runs .load (current_records = True , source = "alma" )
395400
396- full_df = local_dataset_with_runs .read_dataframe ()
397- all_records = list (local_dataset_with_runs .read_dicts_iter ())
398- transformed_records = list (local_dataset_with_runs .read_transformed_records_iter ())
401+ full_df = dataset_with_runs .read_dataframe ()
402+ all_records = list (dataset_with_runs .read_dicts_iter ())
403+ transformed_records = list (dataset_with_runs .read_transformed_records_iter ())
399404
400405 assert len (full_df ) == len (all_records ) == len (transformed_records )
401406
402407
408+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
403409def test_dataset_current_records_no_additional_filtering_accurate_records_yielded (
404- local_dataset_with_runs ,
410+ dataset_with_runs ,
405411):
406- local_dataset_with_runs .load (current_records = True , source = "alma" )
407- df = local_dataset_with_runs .read_dataframe ()
412+ dataset_with_runs .load (current_records = True , source = "alma" )
413+ df = dataset_with_runs .read_dataframe ()
408414 assert df .action .value_counts ().to_dict () == {"index" : 99 , "delete" : 1 }
409415
410416
417+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
411418def test_dataset_current_records_action_filtering_accurate_records_yielded (
412- local_dataset_with_runs ,
419+ dataset_with_runs ,
413420):
414- local_dataset_with_runs .load (current_records = True , source = "alma" )
415- df = local_dataset_with_runs .read_dataframe (action = "index" )
421+ dataset_with_runs .load (current_records = True , source = "alma" )
422+ df = dataset_with_runs .read_dataframe (action = "index" )
416423 assert df .action .value_counts ().to_dict () == {"index" : 99 }
417424
418425
426+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
419427def test_dataset_current_records_index_filtering_accurate_records_yielded (
420- local_dataset_with_runs ,
428+ dataset_with_runs ,
421429):
422430 """This is a somewhat complex test, but demonstrates that only 'current' records
423431 are yielded when .load(current_records=True) is applied.
@@ -437,14 +445,14 @@ def test_dataset_current_records_index_filtering_accurate_records_yielded(
437445 "influenced" what records we would see as we continue backwards in time.
438446 """
439447 # with current_records=False, we get all 25 records from run-5
440- local_dataset_with_runs .load (current_records = False , source = "alma" )
441- df = local_dataset_with_runs .read_dataframe (run_id = "run-5" )
448+ dataset_with_runs .load (current_records = False , source = "alma" )
449+ df = dataset_with_runs .read_dataframe (run_id = "run-5" )
442450 assert len (df ) == 25
443451
444452 # with current_records=True, we only get 15 records from run-5
445453 # because newer run-6 influenced what records are current for older run-5
446- local_dataset_with_runs .load (current_records = True , source = "alma" )
447- df = local_dataset_with_runs .read_dataframe (run_id = "run-5" )
454+ dataset_with_runs .load (current_records = True , source = "alma" )
455+ df = dataset_with_runs .read_dataframe (run_id = "run-5" )
448456 assert len (df ) == 15
449457 assert list (df .timdex_record_id ) == [
450458 "alma:10" ,
@@ -465,6 +473,7 @@ def test_dataset_current_records_index_filtering_accurate_records_yielded(
465473 ]
466474
467475
476+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
468477def test_dataset_load_current_records_gets_correct_same_day_full_run (
469478 dataset_with_same_day_runs ,
470479):
@@ -477,6 +486,7 @@ def test_dataset_load_current_records_gets_correct_same_day_full_run(
477486 assert list (df .run_id .unique ()) == ["run-2" ]
478487
479488
489+ @pytest .mark .skip (reason = "All tests for 'current' records will be reworked." )
480490def test_dataset_load_current_records_gets_correct_same_day_daily_runs_ordering (
481491 dataset_with_same_day_runs ,
482492):
0 commit comments