|
1 | 1 | # ruff: noqa: S105, S106, SLF001, PLR2004, PD901, D209, D205 |
2 | 2 | import math |
3 | 3 | import os |
4 | | -import re |
5 | | -from datetime import date |
6 | 4 | from unittest.mock import patch |
7 | 5 |
|
8 | 6 | import pyarrow.dataset as ds |
|
14 | 12 | TIMDEX_DATASET_SCHEMA, |
15 | 13 | TIMDEXDataset, |
16 | 14 | ) |
17 | | -from timdex_dataset_api.record import DatasetRecord |
18 | | - |
19 | | - |
20 | | -def test_dataset_record_init(): |
21 | | - values = { |
22 | | - "timdex_record_id": "alma:123", |
23 | | - "source_record": b"<record><title>Hello World.</title></record>", |
24 | | - "transformed_record": b"""{"title":["Hello World."]}""", |
25 | | - "source": "libguides", |
26 | | - "run_date": "2024-12-01", |
27 | | - "run_type": "full", |
28 | | - "action": "index", |
29 | | - "run_id": "000-111-aaa-bbb", |
30 | | - } |
31 | | - record = DatasetRecord(**values) |
32 | | - |
33 | | - assert record |
34 | | - assert (record.year, record.month, record.day) == ( |
35 | | - "2024", |
36 | | - "12", |
37 | | - "01", |
38 | | - ) |
39 | | - |
40 | | - |
41 | | -def test_dataset_record_init_with_invalid_run_date_raise_error(): |
42 | | - values = { |
43 | | - "timdex_record_id": "alma:123", |
44 | | - "source_record": b"<record><title>Hello World.</title></record>", |
45 | | - "transformed_record": b"""{"title":["Hello World."]}""", |
46 | | - "source": "libguides", |
47 | | - "run_date": "-12-01", |
48 | | - "run_type": "full", |
49 | | - "action": "index", |
50 | | - "run_id": "000-111-aaa-bbb", |
51 | | - } |
52 | | - |
53 | | - with pytest.raises( |
54 | | - ValueError, match=re.escape("time data '-12-01' does not match format '%Y-%m-%d'") |
55 | | - ): |
56 | | - DatasetRecord(**values) |
57 | | - |
58 | | - |
59 | | -def test_dataset_record_serialization(): |
60 | | - values = { |
61 | | - "timdex_record_id": "alma:123", |
62 | | - "source_record": b"<record><title>Hello World.</title></record>", |
63 | | - "transformed_record": b"""{"title":["Hello World."]}""", |
64 | | - "source": "libguides", |
65 | | - "run_date": "2024-12-01", |
66 | | - "run_type": "full", |
67 | | - "action": "index", |
68 | | - "run_id": "abc123", |
69 | | - } |
70 | | - dataset_record = DatasetRecord(**values) |
71 | | - |
72 | | - assert dataset_record.to_dict() == { |
73 | | - "timdex_record_id": "alma:123", |
74 | | - "source_record": b"<record><title>Hello World.</title></record>", |
75 | | - "transformed_record": b"""{"title":["Hello World."]}""", |
76 | | - "source": "libguides", |
77 | | - "run_date": date(2024, 12, 1), |
78 | | - "run_type": "full", |
79 | | - "action": "index", |
80 | | - "run_id": "abc123", |
81 | | - "year": "2024", |
82 | | - "month": "12", |
83 | | - "day": "01", |
84 | | - } |
85 | 15 |
|
86 | 16 |
|
87 | 17 | def test_dataset_write_records_to_new_local_dataset( |
@@ -115,7 +45,7 @@ def test_dataset_write_record_batches_uses_batch_size( |
115 | 45 | total_records = 101 |
116 | 46 | batch_size = 50 |
117 | 47 | batches = list( |
118 | | - new_local_dataset.get_dataset_record_batches( |
| 48 | + new_local_dataset.create_record_batches( |
119 | 49 | sample_records_iter(total_records), batch_size=batch_size |
120 | 50 | ) |
121 | 51 | ) |
|
0 commit comments