Skip to content

Commit 67f75c5

Browse files
Bulk IDs request
1 parent bccbb13 commit 67f75c5

4 files changed

Lines changed: 87 additions & 31969 deletions

File tree

main.py

Lines changed: 44 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from src.WikidataTextifier import WikidataEntity
77
from src.WikidataLabel import WikidataLabel
8+
from src import utils
89

910
# Start Fastapi app
1011
app = FastAPI(
@@ -48,9 +49,9 @@
4849
},
4950
},
5051
)
51-
async def get_labels(
52+
async def get_textified_wd(
5253
request: Request, background_tasks: BackgroundTasks,
53-
id: str = Query(..., examples="Q42"),
54+
id: str = Query(..., examples="Q42,Q2"),
5455
pid: str = Query(None, examples="P31,P279"),
5556
lang: str = 'en',
5657
format: str = 'json',
@@ -73,40 +74,56 @@ async def get_labels(
7374
Returns:
7475
list: A list of dictionaries containing QIDs and the similarity scores.
7576
"""
76-
if not id:
77-
response = "ID is missing"
78-
return HTTPException(status_code=422, detail=response)
79-
8077
try:
78+
79+
if not id:
80+
response = "ID is missing"
81+
return HTTPException(status_code=422, detail=response)
82+
8183
filter_pids = None
8284
if pid:
8385
filter_pids = [p.strip() for p in pid.split(',')]
8486

85-
entity = WikidataEntity.from_id(
86-
id,
87-
lang=lang,
88-
external_ids=external_ids,
89-
all_ranks=all_ranks,
90-
references=references,
91-
filter_pids=filter_pids
92-
)
93-
94-
if not entity:
95-
response = "Item not found"
87+
qids = [q.strip() for q in id.split(',')]
88+
entity_dict = utils.get_wikidata_entities_by_ids(qids)
89+
90+
if not entity_dict:
91+
response = "ID not found"
9692
return HTTPException(status_code=404, detail=response)
9793

98-
if format == 'json':
99-
results = entity.to_json()
100-
elif format == 'triplet':
101-
results = entity.to_triplet()
102-
elif format == 'text':
103-
results = str(entity)
104-
else:
105-
response = "Invalid format specified"
106-
return HTTPException(status_code=422, detail=response)
94+
return_data = {}
95+
for id in qids:
96+
if id in entity_dict:
97+
entity = WikidataEntity.from_wd(
98+
entity_dict[id],
99+
id=id,
100+
lang=lang,
101+
external_ids=external_ids,
102+
all_ranks=all_ranks,
103+
references=references,
104+
filter_pids=filter_pids
105+
)
106+
107+
if format == 'text':
108+
results = str(entity)
109+
elif format == 'triplet':
110+
results = entity.to_triplet()
111+
else:
112+
results = entity.to_json()
113+
114+
return_data[id] = results
115+
else:
116+
return_data[id] = None
117+
118+
if len(qids) == 1:
119+
return_data = return_data[qids[0]]
120+
if not return_data:
121+
response = "Item not found"
122+
return HTTPException(status_code=404, detail=response)
107123

108124
background_tasks.add_task(WikidataLabel.delete_old_labels)
109-
return results
125+
return return_data
126+
110127
except Exception as e:
111128
traceback.print_exc()
112129
raise HTTPException(status_code=500, detail="Internal Server Error")

src/WikidataTextifier.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -467,18 +467,15 @@ class WikidataEntity:
467467
claims: list[WikidataClaim]
468468

469469
@classmethod
470-
def from_id(cls, id: str,
470+
def from_wd(cls,
471+
entity_dict: dict,
472+
id: str,
471473
lang: str = 'en',
472474
external_ids: bool = True,
473475
all_ranks: bool = False,
474476
references: bool = False,
475477
filter_pids: list[str] | None = None):
476478

477-
entity_dict = get_wikidata_entities_by_ids(id)
478-
if id not in entity_dict:
479-
raise ValueError(f"ID not found.")
480-
481-
entity_dict = entity_dict[id]
482479
if 'labels' not in entity_dict:
483480
return None
484481

@@ -487,7 +484,10 @@ def from_id(cls, id: str,
487484

488485
aliases = entity_dict['aliases'].get(lang, []) + \
489486
entity_dict['aliases'].get('mul', [])
490-
aliases = list(set([alias.get('value') for alias in aliases]))
487+
aliases = list(set([alias.get('value') \
488+
if isinstance(alias, dict) \
489+
else alias \
490+
for alias in aliases]))
491491

492492
lazylabel = LazyLabelFactory(lang=lang)
493493

src/utils.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,41 @@
88
import requests
99
from .WikidataLabel import WikidataLabel
1010

11+
def get_wikidata_entity_by_id(
12+
id,
13+
props='labels,descriptions,aliases,statements'
14+
):
15+
"""
16+
Fetches a Wikidata entity by its ID and returns a dictionary of the entity.
17+
18+
Parameters:
19+
- id (str): A Wikidata entity ID (e.g., Q42, P31).
20+
- props (str): The properties to retrieve.
21+
22+
Returns:
23+
- dict: A dictionary containing the entity, where keys are entity IDs and values are dictionaries of properties.
24+
"""
25+
entity_type = 'items'
26+
if id.startswith('P'):
27+
entity_type = 'properties'
28+
29+
params = {
30+
'_fields': props,
31+
}
32+
headers = {
33+
'User-Agent': 'Wikidata Textifier'
34+
}
35+
36+
response = requests.get(
37+
f"https://www.wikidata.org/w/rest.php/wikibase/v1/entities/{entity_type}/{id}",
38+
params=params,
39+
headers=headers
40+
)
41+
response.raise_for_status()
42+
entity_data = response.json()
43+
return entity_data
44+
45+
1146
def get_wikidata_entities_by_ids(
1247
ids,
1348
props='labels|descriptions|aliases|claims'
@@ -56,6 +91,7 @@ def get_wikidata_entities_by_ids(
5691

5792
return entities_data
5893

94+
5995
def get_all_missing_labels_ids(data):
6096
"""
6197
Get the IDs of the entity dictionary where their labels are missing.

0 commit comments

Comments
 (0)