Skip to content

Commit af66815

Browse files
Add references in JSON format
1 parent 254a027 commit af66815

3 files changed

Lines changed: 30310 additions & 450 deletions

File tree

main.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,24 @@
4949
async def property_query_route(
5050
request: Request,
5151
id: str = Query(..., examples="Q42"),
52+
pid: str = Query(None, examples="P31,P279"),
5253
lang: str = 'en',
5354
format: str = 'json',
54-
external_ids: bool = True
55+
external_ids: bool = True,
56+
references: bool = False,
57+
all_ranks: bool = False
5558
):
5659
"""
5760
Retrieve a Wikidata item with all labels or textual representations for an LLM.
5861
5962
Args:
6063
id (str): The Wikidata item ID (e.g., "Q42").
64+
pid (str): Comma-separated list of property IDs to filter claims (e.g., "P31,P279").
6165
format (str): The format of the response, either 'json', 'text', or 'triplet'.
6266
lang (str): The language code for labels (default is 'en').
6367
external_ids (bool): If True, includes external IDs in the response.
68+
all_ranks (bool): If True, includes statements of all ranks (preferred, normal, deprecated).
69+
references (bool): If True, includes references in the response. (only available in JSON format)
6470
6571
Returns:
6672
list: A list of dictionaries containing QIDs and the similarity scores.
@@ -70,10 +76,17 @@ async def property_query_route(
7076
return HTTPException(status_code=422, detail=response)
7177

7278
try:
79+
filter_pids = None
80+
if pid:
81+
filter_pids = [p.strip() for p in pid.split(',')]
82+
7383
entity = WikidataEntity.from_id(
7484
id,
7585
lang=lang,
76-
external_ids=external_ids
86+
external_ids=external_ids,
87+
all_ranks=all_ranks,
88+
references=references,
89+
filter_pids=filter_pids
7790
)
7891

7992
if not entity:

src/WikidataTextifier.py

Lines changed: 98 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,9 @@ def from_raw(cls, value, lazylabel):
151151
)
152152

153153
def __str__(self):
154-
return f"{self.amount} {str(self.unit) or ''}".strip()
154+
if self.unit_id:
155+
return f"{self.amount} {str(self.unit)}"
156+
return self.amount
155157

156158
def to_json(self):
157159
if self.unit_id:
@@ -167,15 +169,19 @@ def to_json(self):
167169
class WikidataClaimValue:
168170
claim: "WikidataClaim"
169171
value: "WikidataEntity | WikidataQuantity | WikidataTime | WikidataCoordinates | WikidataText | None"
170-
qualifiers: list["WikidataClaim"]
172+
qualifiers: list["WikidataClaim"] | None
173+
references: list[list["WikidataClaim"]] | None
174+
rank: str | None # 'preferred', 'normal', 'deprecated'
171175

172176
@classmethod
173-
def from_raw(cls, claim, value, qualifiers, lazylabel):
177+
def from_raw(cls, claim, value, qualifiers, references, rank, lazylabel):
174178
if value.get('value') is None:
175179
return cls(
176180
claim=claim,
177181
value=None,
178-
qualifiers=[]
182+
qualifiers=None,
183+
references=None,
184+
rank=rank
179185
)
180186
elif value.get('type') == 'wikibase-entityid':
181187
id = value['value']['id']
@@ -227,10 +233,34 @@ def from_raw(cls, claim, value, qualifiers, lazylabel):
227233
)
228234
)
229235

236+
# Setup the references
237+
parsed_references = []
238+
for reference in references:
239+
reference_claims = reference.get('snaks', {})
240+
parsed_references_sublist = []
241+
for pid, reference_claim in reference_claims.items():
242+
parsed_references_sublist.append(
243+
WikidataClaim.from_raw(
244+
subject=None,
245+
property=WikidataEntity(
246+
id=pid,
247+
label=lazylabel.create(pid),
248+
description=None,
249+
aliases=[],
250+
claims=[]
251+
),
252+
claim=reference_claim,
253+
lazylabel=lazylabel
254+
)
255+
)
256+
parsed_references.append(parsed_references_sublist)
257+
230258
return cls(
231259
claim=claim,
232260
value=parsed_value,
233-
qualifiers=parsed_qualifiers
261+
qualifiers=parsed_qualifiers,
262+
references=parsed_references,
263+
rank=rank
234264
)
235265

236266
def __str__(self):
@@ -239,8 +269,13 @@ def __str__(self):
239269

240270
string = str(self.value)
241271
qualifiers = [str(q) for q in self.qualifiers if q]
272+
273+
if self.rank == 'deprecated':
274+
string += " [deprecated]"
275+
242276
if len(qualifiers) > 0:
243277
string += f" ({', '.join(qualifiers)})"
278+
244279
return string
245280

246281
def __bool__(self):
@@ -258,17 +293,24 @@ def to_json(self):
258293
'label': str(value['label'])
259294
}
260295

261-
qualifiers = [q.to_json() for q in self.qualifiers if q]
262-
if len(qualifiers) == 0:
263-
return {
264-
"value": value
265-
}
266-
267-
return {
268-
"value": value,
269-
"qualifiers": [q.to_json() for q in self.qualifiers if q]
296+
return_dict = {
297+
"value": value
270298
}
271299

300+
if self.qualifiers:
301+
qualifiers = [q.to_json() for q in self.qualifiers if q]
302+
return_dict["qualifiers"] = qualifiers
303+
304+
if self.references:
305+
references = [[r.to_json() for r in ref if r] \
306+
for ref in self.references]
307+
return_dict["references"] = references
308+
309+
if self.rank:
310+
return_dict["rank"] = self.rank
311+
312+
return return_dict
313+
272314
def to_triplet(self):
273315
if not self:
274316
return ''
@@ -277,6 +319,9 @@ def to_triplet(self):
277319
if isinstance(self.value, WikidataEntity):
278320
string = f"{str(self.value.label)} ({self.value.id})"
279321

322+
if self.rank == 'deprecated':
323+
string += " [deprecated]"
324+
280325
qualifiers = [q.to_triplet() for q in self.qualifiers if q]
281326
if len(qualifiers) > 0:
282327
string += f" | {' | '.join(qualifiers)})"
@@ -291,7 +336,8 @@ class WikidataClaim:
291336
datatype: str
292337

293338
@classmethod
294-
def from_raw(cls, subject, property, claim, lazylabel, external_ids=True):
339+
def from_raw(cls, subject, property, claim, lazylabel,
340+
external_ids=True, references=False, all_ranks=False):
295341
if not claim:
296342
return cls(
297343
subject=subject,
@@ -319,24 +365,35 @@ def from_raw(cls, subject, property, claim, lazylabel, external_ids=True):
319365

320366
# Include only rank preferred claims or rank normal if preferred is not found.
321367
for i in range(len(claim)):
368+
322369
if 'rank' not in claim[i]:
323-
claim[i]['rank'] = 'normal'
370+
# For qualifiers and references, rank is not defined
371+
claim[i]['rank'] = None
372+
claim[i]['include'] = True
324373

325-
is_rank_normal = (claim[i].get('rank') == 'normal')
326-
is_rank_preferred = (claim[i].get('rank') == 'preferred')
327-
rank_normal_condition = is_rank_normal and \
328-
(not rank_preferred_found)
329-
rank_preferred_condition = is_rank_preferred and \
330-
rank_preferred_found
331-
claim[i]['include'] = rank_normal_condition or \
332-
rank_preferred_condition
374+
else:
375+
# Skip the filtering if all ranks are requested
376+
if all_ranks:
377+
claim[i]['include'] = True
378+
continue
379+
380+
is_rank_normal = (claim[i].get('rank') == 'normal')
381+
is_rank_preferred = (claim[i].get('rank') == 'preferred')
382+
rank_normal_condition = is_rank_normal and \
383+
(not rank_preferred_found)
384+
rank_preferred_condition = is_rank_preferred and \
385+
rank_preferred_found
386+
claim[i]['include'] = rank_normal_condition or \
387+
rank_preferred_condition
333388

334389
values = [
335390
WikidataClaimValue.from_raw(
336391
claim=None,
337392
value=value.get('datavalue', {}),
338393
qualifiers=value.get('qualifiers', {}),
339-
lazylabel=lazylabel
394+
references=value.get('references', []) if references else [],
395+
lazylabel=lazylabel,
396+
rank=value.get('rank', None)
340397
) for value in claim if value['include']
341398
]
342399

@@ -401,7 +458,13 @@ class WikidataEntity:
401458
claims: list[WikidataClaim]
402459

403460
@classmethod
404-
def from_id(cls, id: str, lang: str = 'en', external_ids: bool = True):
461+
def from_id(cls, id: str,
462+
lang: str = 'en',
463+
external_ids: bool = True,
464+
all_ranks: bool = False,
465+
references: bool = False,
466+
filter_pids: list[str] | None = None):
467+
405468
entity_dict = get_wikidata_entities_by_ids(id)
406469
if id not in entity_dict:
407470
raise ValueError(f"ID not found.")
@@ -419,6 +482,11 @@ def from_id(cls, id: str, lang: str = 'en', external_ids: bool = True):
419482

420483
lazylabel = LazyLabelFactory(lang=lang)
421484

485+
claims = entity_dict.get('claims', {})
486+
if filter_pids:
487+
claims = {pid: claim for pid, claim in claims.items() \
488+
if pid in filter_pids}
489+
422490
claims = [
423491
WikidataClaim.from_raw(
424492
subject=None,
@@ -431,8 +499,10 @@ def from_id(cls, id: str, lang: str = 'en', external_ids: bool = True):
431499
),
432500
claim=claim,
433501
lazylabel=lazylabel,
434-
external_ids=external_ids
435-
) for pid, claim in entity_dict.get('claims', {}).items()
502+
external_ids=external_ids,
503+
references=references,
504+
all_ranks=all_ranks
505+
) for pid, claim in claims.items()
436506
]
437507

438508
entity = cls(

0 commit comments

Comments
 (0)