22
33from collections import defaultdict
44from typing import Any , DefaultDict , Dict , List , Optional , Set
5+ import requests
56
67from rdflib import Graph , Literal , Namespace , URIRef
78from rdflib .namespace import RDF , RDFS
89
910from ..WikidataLabel import WikidataLabel , LazyLabelFactory
10- from ..WikidataTextifier import (
11+ from ..Textifier . WikidataTextifier import (
1112 WikidataClaim ,
1213 WikidataClaimValue ,
1314 WikidataCoordinates ,
@@ -80,6 +81,7 @@ def normalize(
8081 external_ids : bool = True ,
8182 references : bool = False ,
8283 all_ranks : bool = False ,
84+ qualifiers : bool = True ,
8385 filter_pids : List [str ] = []
8486 ) -> WikidataEntity :
8587 # Preload labels found inside TTL so LazyLabelFactory can avoid lookups.
@@ -107,6 +109,7 @@ def normalize(
107109 external_ids = external_ids ,
108110 include_references = references ,
109111 all_ranks = all_ranks ,
112+ qualifiers = qualifiers ,
110113 filter_pids = filter_pids
111114 )
112115
@@ -124,6 +127,7 @@ def normalize(
124127 pid = pid ,
125128 statements = statements ,
126129 include_references = references ,
130+ qualifiers = qualifiers ,
127131 )
128132 for pid , statements in claims_dict .items ()
129133 if statements
@@ -162,6 +166,7 @@ def _claims_for_subject(
162166 external_ids : bool ,
163167 include_references : bool ,
164168 all_ranks : bool ,
169+ qualifiers : bool ,
165170 filter_pids : List [str ] = []
166171 ) -> Dict [str , List [Dict [str , Any ]]]:
167172 """Return mapping: pid -> list of statement dicts."""
@@ -195,7 +200,7 @@ def _claims_for_subject(
195200 is_special = self ._is_special_main_value (obj , pid )
196201 main = None if is_special else self ._main_value (obj , pid , datatype )
197202
198- qualifiers = self ._qualifiers (obj )
203+ qualifiers_data = self ._qualifiers (obj ) if qualifiers else {}
199204 refs = self ._references (obj ) if include_references else []
200205
201206 out [pid ].append (
@@ -204,7 +209,7 @@ def _claims_for_subject(
204209 "datatype" : datatype ,
205210 "rank" : rank ,
206211 "main" : main ,
207- "qualifiers" : qualifiers if qualifiers else {},
212+ "qualifiers" : qualifiers_data if qualifiers_data else {},
208213 "references" : refs if refs else [],
209214 "is_special_value" : is_special ,
210215 }
@@ -273,6 +278,7 @@ def _build_claim_object(
273278 pid : str ,
274279 statements : List [Dict [str , Any ]],
275280 include_references : bool ,
281+ qualifiers : bool = True ,
276282 ) -> WikidataClaim :
277283 prop_ent = WikidataEntity (
278284 id = pid ,
@@ -291,15 +297,17 @@ def _build_claim_object(
291297 print (f"{ pid } : { st .get ('main' )} (special: { st .get ('is_special_value' , False )} )" )
292298
293299 value_obj = self ._to_value_object (st ["datatype" ], st .get ("main" ))
294-
295- qualifiers_obj : List [WikidataClaim ] = [
296- self ._build_snak_claim (
297- pid = qpid ,
298- datatype = self ._prop_datatype (qpid ),
299- snaks = qsnaks ,
300- )
301- for qpid , qsnaks in (st .get ("qualifiers" ) or {}).items ()
302- ]
300+ qualifiers_obj : List [WikidataClaim ] = []
301+
302+ if qualifiers :
303+ qualifiers_obj = [
304+ self ._build_snak_claim (
305+ pid = qpid ,
306+ datatype = self ._prop_datatype (qpid ),
307+ snaks = qsnaks ,
308+ )
309+ for qpid , qsnaks in (st .get ("qualifiers" ) or {}).items ()
310+ ]
303311
304312 refs_obj : List [List [WikidataClaim ]] = []
305313 if include_references :
@@ -409,7 +417,7 @@ def _to_value_object(self, datatype: str, parsed: Any) -> Any:
409417 parsed ,
410418 self .lang ,
411419 )
412- except (ValueError , TypeError ) as e :
420+ except (ValueError , TypeError , KeyError , requests . RequestException ) as e :
413421 if self .debug :
414422 print (f"Warning: Failed to parse time value { time_val } : { e } " )
415423 return None
@@ -436,7 +444,7 @@ def _to_value_object(self, datatype: str, parsed: Any) -> Any:
436444
437445 try :
438446 string_val = wikidata_geolocation_to_text (parsed , self .lang )
439- except (ValueError , TypeError ) as e :
447+ except (ValueError , TypeError , KeyError , requests . RequestException ) as e :
440448 if self .debug :
441449 print (f"Warning: Failed to parse coordinates ({ lat } , { lon } ): { e } " )
442450 return None
0 commit comments