@@ -58,3 +58,193 @@ def test_dataset_metadata_structure_is_idempotent(timdex_dataset_metadata):
5858 assert os .path .exists (timdex_dataset_metadata .metadata_root )
5959 end_file_count = glob .glob (f"{ timdex_dataset_metadata .metadata_root } /**/*" )
6060 assert start_file_count == end_file_count
61+
62+
63+ def test_tdm_views_created_on_init (timdex_dataset_metadata ):
64+ views = timdex_dataset_metadata .conn .query (
65+ """select table_name from information_schema.tables where table_type = 'VIEW';"""
66+ ).to_df ()
67+
68+ expected_views = {"append_deltas" , "records" , "current_records" }
69+ actual_views = set (views .table_name )
70+ assert expected_views <= actual_views
71+
72+
73+ def test_tdm_records_view_structure (timdex_dataset_metadata ):
74+ records_df = timdex_dataset_metadata .conn .query (
75+ """select * from records limit 1;"""
76+ ).to_df ()
77+ expected_columns = {
78+ "timdex_record_id" ,
79+ "source" ,
80+ "run_date" ,
81+ "run_type" ,
82+ "action" ,
83+ "run_id" ,
84+ "run_record_offset" ,
85+ "run_timestamp" ,
86+ "filename" ,
87+ }
88+ assert set (records_df .columns ) == expected_columns
89+
90+
91+ def test_tdm_current_records_view_structure (timdex_dataset_metadata ):
92+ current_records_df = timdex_dataset_metadata .conn .query (
93+ """select * from current_records limit 1;"""
94+ ).to_df ()
95+ expected_columns = {
96+ "timdex_record_id" ,
97+ "source" ,
98+ "run_date" ,
99+ "run_type" ,
100+ "action" ,
101+ "run_id" ,
102+ "run_record_offset" ,
103+ "run_timestamp" ,
104+ "filename" ,
105+ }
106+ assert set (current_records_df .columns ) == expected_columns
107+
108+
109+ def test_tdm_append_deltas_view_empty_structure (timdex_dataset_metadata ):
110+ append_deltas_df = timdex_dataset_metadata .conn .query (
111+ """select * from append_deltas;"""
112+ ).to_df ()
113+ expected_columns = {
114+ "timdex_record_id" ,
115+ "source" ,
116+ "run_date" ,
117+ "run_type" ,
118+ "action" ,
119+ "run_id" ,
120+ "run_record_offset" ,
121+ "run_timestamp" ,
122+ "filename" ,
123+ }
124+ assert set (append_deltas_df .columns ) == expected_columns
125+ assert len (append_deltas_df ) == 0
126+
127+
128+ def test_tdm_records_count_property (timdex_dataset_metadata ):
129+ assert timdex_dataset_metadata .records_count > 0
130+
131+ manual_count = timdex_dataset_metadata .conn .query (
132+ """select count(*) from records;"""
133+ ).fetchone ()[0 ]
134+ assert timdex_dataset_metadata .records_count == manual_count
135+
136+
137+ def test_tdm_current_records_count_property (timdex_dataset_metadata ):
138+ assert timdex_dataset_metadata .current_records_count > 0
139+
140+ manual_count = timdex_dataset_metadata .conn .query (
141+ """select count(*) from current_records;"""
142+ ).fetchone ()[0 ]
143+ assert timdex_dataset_metadata .current_records_count == manual_count
144+
145+
146+ def test_tdm_append_deltas_count_property_empty (timdex_dataset_metadata ):
147+ assert timdex_dataset_metadata .append_deltas_count == 0
148+
149+
150+ def test_tdm_records_equals_static_without_deltas (timdex_dataset_metadata ):
151+ static_count = timdex_dataset_metadata .conn .query (
152+ """select count(*) from static_db.records;"""
153+ ).fetchone ()[0 ]
154+ records_count = timdex_dataset_metadata .conn .query (
155+ """select count(*) from records;"""
156+ ).fetchone ()[0 ]
157+ assert static_count == records_count
158+
159+
160+ def test_tdm_current_records_filtering_logic (timdex_dataset_metadata ):
161+ current_count = timdex_dataset_metadata .current_records_count
162+ total_count = timdex_dataset_metadata .records_count
163+
164+ assert current_count <= total_count
165+ assert current_count > 0
166+
167+
168+ def test_tdm_views_with_append_deltas (timdex_dataset_metadata_with_deltas ):
169+ views = timdex_dataset_metadata_with_deltas .conn .query (
170+ """select table_name from information_schema.tables where table_type = 'VIEW';"""
171+ ).to_df ()
172+
173+ expected_views = {"append_deltas" , "records" , "current_records" }
174+ actual_views = set (views .table_name )
175+ assert expected_views .issubset (actual_views )
176+
177+
178+ def test_tdm_append_deltas_view_has_data (timdex_dataset_metadata_with_deltas ):
179+ append_deltas_count = timdex_dataset_metadata_with_deltas .append_deltas_count
180+ assert append_deltas_count > 0
181+
182+
183+ def test_tdm_records_includes_deltas (timdex_dataset_metadata_with_deltas ):
184+ static_count = timdex_dataset_metadata_with_deltas .conn .query (
185+ """select count(*) from static_db.records;"""
186+ ).fetchone ()[0 ]
187+ deltas_count = timdex_dataset_metadata_with_deltas .append_deltas_count
188+ records_count = timdex_dataset_metadata_with_deltas .records_count
189+
190+ assert records_count == static_count + deltas_count
191+ assert records_count > static_count
192+
193+
194+ def test_tdm_current_records_with_deltas_logic (timdex_dataset_metadata_with_deltas ):
195+ current_count = timdex_dataset_metadata_with_deltas .current_records_count
196+ total_count = timdex_dataset_metadata_with_deltas .records_count
197+
198+ assert current_count <= total_count
199+ assert current_count > 0
200+
201+ # verify current records view returns unique timdex_record_id values
202+ current_records_df = timdex_dataset_metadata_with_deltas .conn .query (
203+ """select timdex_record_id from current_records;"""
204+ ).to_df ()
205+
206+ unique_count = len (current_records_df .timdex_record_id .unique ())
207+ assert unique_count == current_count
208+
209+
210+ def test_tdm_current_records_most_recent_version (timdex_dataset_metadata_with_deltas ):
211+ # check that for records with multiple versions, only the most recent is returned
212+ multi_version_records = timdex_dataset_metadata_with_deltas .conn .query (
213+ """
214+ select timdex_record_id, count(*) as version_count
215+ from records
216+ group by timdex_record_id
217+ having count(*) > 1
218+ limit 1;
219+ """
220+ ).to_df ()
221+
222+ if len (multi_version_records ) > 0 :
223+ record_id = multi_version_records .iloc [0 ]["timdex_record_id" ]
224+
225+ # get most recent timestamp for this record
226+ most_recent = timdex_dataset_metadata_with_deltas .conn .query (
227+ f"""
228+ select run_timestamp, run_id
229+ from records
230+ where timdex_record_id = '{ record_id } '
231+ order by run_timestamp desc
232+ limit 1;
233+ """
234+ ).to_df ()
235+
236+ # verify current_records contains this version
237+ current_version = timdex_dataset_metadata_with_deltas .conn .query (
238+ f"""
239+ select run_timestamp, run_id
240+ from current_records
241+ where timdex_record_id = '{ record_id } ';
242+ """
243+ ).to_df ()
244+
245+ assert len (current_version ) == 1
246+ assert (
247+ current_version .iloc [0 ]["run_timestamp" ]
248+ == most_recent .iloc [0 ]["run_timestamp" ]
249+ )
250+ assert current_version .iloc [0 ]["run_id" ] == most_recent .iloc [0 ]["run_id" ]
0 commit comments