Skip to content

Commit 99e20ec

Browse files
committed
rename test fixtures
1 parent c9eb7bc commit 99e20ec

2 files changed

Lines changed: 34 additions & 38 deletions

File tree

tests/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ def local_dataset(local_dataset_location):
2929

3030

3131
@pytest.fixture
32-
def new_temp_dataset(tmp_path) -> TIMDEXDataset:
32+
def new_dataset(tmp_path) -> TIMDEXDataset:
3333
location = str(tmp_path / "new_dataset")
3434
return TIMDEXDataset(location=location)
3535

3636

3737
@pytest.fixture
38-
def small_records_iter():
38+
def sample_records_iter():
3939
"""Simulates an iterator of X number of valid DatasetRecord instances."""
4040

4141
def _records_iter(num_records):
@@ -45,7 +45,7 @@ def _records_iter(num_records):
4545

4646

4747
@pytest.fixture
48-
def small_records_iter_without_partitions():
48+
def sample_records_iter_without_partitions():
4949
"""Simulates an iterator of X number of DatasetRecord instances WITHOUT partition
5050
values included."""
5151

tests/test_dataset_write.py

Lines changed: 31 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -59,70 +59,66 @@ def test_dataset_record_serialization_with_partition_values_provided():
5959
}
6060

6161

62-
def test_dataset_write_records_to_new_dataset(new_temp_dataset, small_records_iter):
63-
files_written = new_temp_dataset.write(small_records_iter(10_000))
62+
def test_dataset_write_records_to_new_dataset(new_dataset, sample_records_iter):
63+
files_written = new_dataset.write(sample_records_iter(10_000))
6464
assert len(files_written) == 1
65-
assert os.path.exists(new_temp_dataset.location)
65+
assert os.path.exists(new_dataset.location)
6666

6767
# load newly created dataset as new TIMDEXDataset instance
68-
dataset = TIMDEXDataset.load(new_temp_dataset.location)
68+
dataset = TIMDEXDataset.load(new_dataset.location)
6969
assert dataset.row_count == 10_000
7070

7171

72-
def test_dataset_reload_after_write(new_temp_dataset, small_records_iter):
73-
files_written = new_temp_dataset.write(small_records_iter(10_000))
72+
def test_dataset_reload_after_write(new_dataset, sample_records_iter):
73+
files_written = new_dataset.write(sample_records_iter(10_000))
7474
assert len(files_written) == 1
75-
assert os.path.exists(new_temp_dataset.location)
75+
assert os.path.exists(new_dataset.location)
7676

7777
# attempt row count before reload
7878
with pytest.raises(DatasetNotLoadedError):
79-
_ = new_temp_dataset.row_count
79+
_ = new_dataset.row_count
8080

8181
# attempt row count after reload
82-
new_temp_dataset.reload()
83-
assert new_temp_dataset.row_count == 10_000
82+
new_dataset.reload()
83+
assert new_dataset.row_count == 10_000
8484

8585

86-
def test_dataset_write_default_max_rows_per_file(new_temp_dataset, small_records_iter):
86+
def test_dataset_write_default_max_rows_per_file(new_dataset, sample_records_iter):
8787
"""Default is 100k rows per file, therefore writing 200,033 records should result in
8888
3 files (x2 @ 100k rows, x1 @ 33 rows)."""
8989
total_records = 200_033
9090

91-
new_temp_dataset.write(small_records_iter(total_records))
92-
new_temp_dataset.reload()
91+
new_dataset.write(sample_records_iter(total_records))
92+
new_dataset.reload()
9393

94-
assert new_temp_dataset.row_count == total_records
95-
assert len(new_temp_dataset.dataset.files) == math.ceil(
96-
total_records / MAX_ROWS_PER_FILE
97-
)
94+
assert new_dataset.row_count == total_records
95+
assert len(new_dataset.dataset.files) == math.ceil(total_records / MAX_ROWS_PER_FILE)
9896

9997

100-
def test_dataset_write_record_batches_uses_batch_size(
101-
new_temp_dataset, small_records_iter
102-
):
98+
def test_dataset_write_record_batches_uses_batch_size(new_dataset, sample_records_iter):
10399
total_records = 101
104100
batch_size = 50
105101
batches = list(
106-
new_temp_dataset.get_dataset_record_batches(
107-
small_records_iter(total_records), batch_size=batch_size
102+
new_dataset.get_dataset_record_batches(
103+
sample_records_iter(total_records), batch_size=batch_size
108104
)
109105
)
110106
assert len(batches) == math.ceil(total_records / batch_size)
111107

112108

113-
def test_dataset_write_to_multiple_locations_raise_error(small_records_iter):
109+
def test_dataset_write_to_multiple_locations_raise_error(sample_records_iter):
114110
timdex_dataset = TIMDEXDataset(
115111
location=["/path/to/records-1.parquet", "/path/to/records-2.parquet"]
116112
)
117113
with pytest.raises(
118114
TypeError,
119115
match="Dataset location must be the root of a single dataset for writing",
120116
):
121-
timdex_dataset.write(small_records_iter(10))
117+
timdex_dataset.write(sample_records_iter(10))
122118

123119

124120
def test_dataset_write_mixin_partition_values_used(
125-
new_temp_dataset, small_records_iter_without_partitions
121+
new_dataset, sample_records_iter_without_partitions
126122
):
127123
partition_values = {
128124
"source": "alma",
@@ -131,14 +127,14 @@ def test_dataset_write_mixin_partition_values_used(
131127
"action": "index",
132128
"run_id": "000-111-aaa-bbb",
133129
}
134-
_written_files = new_temp_dataset.write(
135-
small_records_iter_without_partitions(10),
130+
_written_files = new_dataset.write(
131+
sample_records_iter_without_partitions(10),
136132
partition_values=partition_values,
137133
)
138-
new_temp_dataset.reload()
134+
new_dataset.reload()
139135

140136
# load as pandas dataframe and assert column values
141-
df = new_temp_dataset.dataset.to_table().to_pandas()
137+
df = new_dataset.dataset.to_table().to_pandas()
142138
row = df.iloc[0]
143139
assert row.source == partition_values["source"]
144140
assert row.run_date == datetime.date(2024, 12, 1)
@@ -148,10 +144,10 @@ def test_dataset_write_mixin_partition_values_used(
148144

149145

150146
def test_dataset_write_schema_partitions_correctly_ordered(
151-
new_temp_dataset, small_records_iter
147+
new_dataset, sample_records_iter
152148
):
153-
written_files = new_temp_dataset.write(
154-
small_records_iter(10),
149+
written_files = new_dataset.write(
150+
sample_records_iter(10),
155151
partition_values={
156152
"source": "alma",
157153
"run_date": "2024-12-01",
@@ -167,13 +163,13 @@ def test_dataset_write_schema_partitions_correctly_ordered(
167163
)
168164

169165

170-
def test_dataset_write_schema_applied_to_dataset(new_temp_dataset, small_records_iter):
171-
new_temp_dataset.write(small_records_iter(10))
166+
def test_dataset_write_schema_applied_to_dataset(new_dataset, sample_records_iter):
167+
new_dataset.write(sample_records_iter(10))
172168

173169
# manually load dataset to confirm schema without TIMDEXDataset projecting schema
174170
# during load
175171
dataset = ds.dataset(
176-
new_temp_dataset.location,
172+
new_dataset.location,
177173
format="parquet",
178174
partitioning="hive",
179175
)

0 commit comments

Comments
 (0)