Skip to content

Commit 8c345d2

Browse files
committed
slight rework of testing filenames
1 parent 9e807e9 commit 8c345d2

3 files changed

Lines changed: 76 additions & 83 deletions

File tree

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,9 @@
44
import pyarrow as pa
55
import pytest
66

7-
DATASET_COLUMNS_SET = {
8-
"timdex_record_id",
9-
"source_record",
10-
"transformed_record",
11-
"source",
12-
"run_date",
13-
"run_type",
14-
"run_id",
15-
"action",
16-
"year",
17-
"month",
18-
"day",
19-
}
7+
from timdex_dataset_api.dataset import TIMDEX_DATASET_SCHEMA
8+
9+
DATASET_COLUMNS_SET = set(TIMDEX_DATASET_SCHEMA.names)
2010

2111

2212
def test_read_batches_yields_pyarrow_record_batches(fixed_local_dataset):

tests/test_records.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import re
2+
from datetime import date
3+
4+
import pytest
5+
6+
from timdex_dataset_api.record import DatasetRecord
7+
8+
9+
def test_dataset_record_init_with_valid_run_date_parses_year_month_day():
10+
values = {
11+
"timdex_record_id": "alma:123",
12+
"source_record": b"<record><title>Hello World.</title></record>",
13+
"transformed_record": b"""{"title":["Hello World."]}""",
14+
"source": "libguides",
15+
"run_date": "2024-12-01",
16+
"run_type": "full",
17+
"action": "index",
18+
"run_id": "000-111-aaa-bbb",
19+
}
20+
record = DatasetRecord(**values)
21+
22+
assert record
23+
assert (record.year, record.month, record.day) == (
24+
"2024",
25+
"12",
26+
"01",
27+
)
28+
29+
30+
def test_dataset_record_init_with_invalid_run_date_raise_error():
31+
values = {
32+
"timdex_record_id": "alma:123",
33+
"source_record": b"<record><title>Hello World.</title></record>",
34+
"transformed_record": b"""{"title":["Hello World."]}""",
35+
"source": "libguides",
36+
"run_date": "-12-01",
37+
"run_type": "full",
38+
"action": "index",
39+
"run_id": "000-111-aaa-bbb",
40+
}
41+
42+
with pytest.raises(
43+
ValueError, match=re.escape("time data '-12-01' does not match format '%Y-%m-%d'")
44+
):
45+
DatasetRecord(**values)
46+
47+
48+
def test_dataset_record_serialization():
49+
values = {
50+
"timdex_record_id": "alma:123",
51+
"source_record": b"<record><title>Hello World.</title></record>",
52+
"transformed_record": b"""{"title":["Hello World."]}""",
53+
"source": "libguides",
54+
"run_date": "2024-12-01",
55+
"run_type": "full",
56+
"action": "index",
57+
"run_id": "abc123",
58+
}
59+
dataset_record = DatasetRecord(**values)
60+
61+
assert dataset_record.to_dict() == {
62+
"timdex_record_id": "alma:123",
63+
"source_record": b"<record><title>Hello World.</title></record>",
64+
"transformed_record": b"""{"title":["Hello World."]}""",
65+
"source": "libguides",
66+
"run_date": date(2024, 12, 1),
67+
"run_type": "full",
68+
"action": "index",
69+
"run_id": "abc123",
70+
"year": "2024",
71+
"month": "12",
72+
"day": "01",
73+
}
Lines changed: 0 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# ruff: noqa: S105, S106, SLF001, PLR2004, PD901, D209, D205
22
import math
33
import os
4-
import re
5-
from datetime import date
64
from unittest.mock import patch
75

86
import pyarrow.dataset as ds
@@ -14,74 +12,6 @@
1412
TIMDEX_DATASET_SCHEMA,
1513
TIMDEXDataset,
1614
)
17-
from timdex_dataset_api.record import DatasetRecord
18-
19-
20-
def test_dataset_record_init():
21-
values = {
22-
"timdex_record_id": "alma:123",
23-
"source_record": b"<record><title>Hello World.</title></record>",
24-
"transformed_record": b"""{"title":["Hello World."]}""",
25-
"source": "libguides",
26-
"run_date": "2024-12-01",
27-
"run_type": "full",
28-
"action": "index",
29-
"run_id": "000-111-aaa-bbb",
30-
}
31-
record = DatasetRecord(**values)
32-
33-
assert record
34-
assert (record.year, record.month, record.day) == (
35-
"2024",
36-
"12",
37-
"01",
38-
)
39-
40-
41-
def test_dataset_record_init_with_invalid_run_date_raise_error():
42-
values = {
43-
"timdex_record_id": "alma:123",
44-
"source_record": b"<record><title>Hello World.</title></record>",
45-
"transformed_record": b"""{"title":["Hello World."]}""",
46-
"source": "libguides",
47-
"run_date": "-12-01",
48-
"run_type": "full",
49-
"action": "index",
50-
"run_id": "000-111-aaa-bbb",
51-
}
52-
53-
with pytest.raises(
54-
ValueError, match=re.escape("time data '-12-01' does not match format '%Y-%m-%d'")
55-
):
56-
DatasetRecord(**values)
57-
58-
59-
def test_dataset_record_serialization():
60-
values = {
61-
"timdex_record_id": "alma:123",
62-
"source_record": b"<record><title>Hello World.</title></record>",
63-
"transformed_record": b"""{"title":["Hello World."]}""",
64-
"source": "libguides",
65-
"run_date": "2024-12-01",
66-
"run_type": "full",
67-
"action": "index",
68-
"run_id": "abc123",
69-
}
70-
dataset_record = DatasetRecord(**values)
71-
72-
assert dataset_record.to_dict() == {
73-
"timdex_record_id": "alma:123",
74-
"source_record": b"<record><title>Hello World.</title></record>",
75-
"transformed_record": b"""{"title":["Hello World."]}""",
76-
"source": "libguides",
77-
"run_date": date(2024, 12, 1),
78-
"run_type": "full",
79-
"action": "index",
80-
"run_id": "abc123",
81-
"year": "2024",
82-
"month": "12",
83-
"day": "01",
84-
}
8515

8616

8717
def test_dataset_write_records_to_new_local_dataset(

0 commit comments

Comments
 (0)