Skip to content

Commit fd3436d

Browse files
Fallback language parameter
1 parent 4c4ecbf commit fd3436d

3 files changed

Lines changed: 13 additions & 8 deletions

File tree

main.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ async def get_textified_wd(
5757
format: str = 'json',
5858
external_ids: bool = True,
5959
references: bool = False,
60-
all_ranks: bool = False
60+
all_ranks: bool = False,
61+
fallback_lang: str = 'en'
6162
):
6263
"""
6364
Retrieve a Wikidata item with all labels or textual representations for an LLM.
@@ -70,6 +71,7 @@ async def get_textified_wd(
7071
external_ids (bool): If True, includes external IDs in the response.
7172
all_ranks (bool): If True, includes statements of all ranks (preferred, normal, deprecated).
7273
references (bool): If True, includes references in the response. (only available in JSON format)
74+
fallback_lang (str): The fallback language code if the preferred language is not available.
7375
7476
Returns:
7577
list: A list of dictionaries containing QIDs and the similarity scores.
@@ -101,7 +103,8 @@ async def get_textified_wd(
101103
external_ids=external_ids,
102104
all_ranks=all_ranks,
103105
references=references,
104-
filter_pids=filter_pids
106+
filter_pids=filter_pids,
107+
fallback_lang=fallback_lang
105108
)
106109

107110
if not entity:

src/WikidataLabel.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,9 @@ def __str__(self):
307307
return self.factory.get_label(self.qid)
308308

309309
class LazyLabelFactory:
310-
def __init__(self, lang='en'):
310+
def __init__(self, lang='en', fallback_lang='en'):
311311
self.lang = lang
312+
self.fallback_lang = fallback_lang
312313
self._pending_ids = set()
313314
self._resolved_labels = {}
314315

@@ -325,7 +326,7 @@ def resolve_all(self):
325326

326327
def get_label(self, qid: str) -> str:
327328
label_dict = self._resolved_labels.get(qid, {})
328-
label = WikidataLabel.get_lang_val(label_dict, lang=self.lang, fallback_lang='en')
329+
label = WikidataLabel.get_lang_val(label_dict, lang=self.lang, fallback_lang=self.fallback_lang)
329330
return label
330331

331332
def set_lang(self, lang: str):

src/WikidataTextifier.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -474,13 +474,14 @@ def from_wd(cls,
474474
external_ids: bool = True,
475475
all_ranks: bool = False,
476476
references: bool = False,
477-
filter_pids: list[str] | None = None):
477+
filter_pids: list[str] | None = None,
478+
fallback_lang: str = 'en'):
478479

479480
if 'labels' not in entity_dict:
480481
return None
481482

482-
label = WikidataLabel.get_lang_val(entity_dict['labels'], lang, fallback_lang='en')
483-
description = WikidataLabel.get_lang_val(entity_dict['descriptions'], lang, fallback_lang='en')
483+
label = WikidataLabel.get_lang_val(entity_dict['labels'], lang, fallback_lang=fallback_lang)
484+
description = WikidataLabel.get_lang_val(entity_dict['descriptions'], lang, fallback_lang=fallback_lang)
484485

485486
aliases = entity_dict['aliases'].get(lang, []) + \
486487
entity_dict['aliases'].get('mul', [])
@@ -489,7 +490,7 @@ def from_wd(cls,
489490
else alias \
490491
for alias in aliases]))
491492

492-
lazylabel = LazyLabelFactory(lang=lang)
493+
lazylabel = LazyLabelFactory(lang=lang, fallback_lang=fallback_lang)
493494

494495
claims = entity_dict.get('claims', {})
495496
if filter_pids:

0 commit comments

Comments
 (0)