Skip to content

Commit 4eac53f

Browse files
Add fastAPI and gunicorn
1 parent 85a32c1 commit 4eac53f

9 files changed

Lines changed: 146 additions & 3087 deletions

File tree

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ RUN uv sync
1313
COPY --chmod=755 . .
1414

1515
# Container start script
16-
CMD [ "uv", "run", "main.py" ]
16+
CMD ["uv", "run", "gunicorn", "main:app", "-k", "uvicorn.workers.UvicornWorker", "-w", "4", "-b", "0.0.0.0:8000"]

docker-compose.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
services:
22
wdtextifier:
33
build: .
4+
volumes:
5+
- ./data:/workspace/data
46
container_name: wdtextifier
57
ports:
68
- "8000:8000"

main.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
1-
from typing import Annotated
2-
import time
3-
import os
4-
import traceback
5-
6-
# Import necessary types and classes from FastAPI and other libraries.
71
from fastapi import FastAPI, Header, HTTPException, Query, Request
82
from fastapi.middleware.cors import CORSMiddleware
3+
import traceback
94

105
from src.WikidataTextifier import WikidataEntity
116

@@ -53,27 +48,31 @@
5348
)
5449
async def property_query_route(
5550
request: Request,
56-
id: str = Query(..., example="Q42"),
51+
id: str = Query(..., examples="Q42"),
5752
lang: str = 'en',
58-
json: bool = True,
53+
json: bool = False,
5954
):
6055
"""
6156
Retrieve a Wikidata item with all labels or textual representations for an LLM.
6257
6358
Args:
6459
id (str): The Wikidata item ID (e.g., "Q42").
65-
json (bool): If True, returns the item in JSON format. Defaults to True.
60+
json (bool): If True, returns the item in JSON format.
6661
6762
Returns:
6863
list: A list of dictionaries containing QIDs and the similarity scores.
6964
"""
7065
if not id:
7166
response = "ID is missing"
72-
raise HTTPException(status_code=422, detail=response)
67+
return HTTPException(status_code=422, detail=response)
7368

7469
try:
7570
entity = WikidataEntity.from_id(id, lang=lang)
7671

72+
if not entity:
73+
response = "Item not found"
74+
return HTTPException(status_code=404, detail=response)
75+
7776
if json:
7877
results = entity.to_json()
7978
else:

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,8 @@ readme = "README.md"
66
requires-python = ">=3.13"
77
dependencies = [
88
"fastapi>=0.116.1",
9+
"gunicorn>=23.0.0",
10+
"requests>=2.32.4",
911
"sqlalchemy>=2.0.41",
12+
"uvicorn>=0.35.0",
1013
]

src/WikidataLabel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"""
1111
SQLite database setup for storing Wikidata labels in all languages.
1212
"""
13-
TOOL_DATA_DIR = os.environ.get("TOOL_DATA_DIR", "./")
13+
TOOL_DATA_DIR = os.environ.get("TOOL_DATA_DIR", "./data")
1414
DATABASE_URL = os.path.join(TOOL_DATA_DIR, 'sqlite_wikidata_labels.db')
1515

1616
engine = create_engine(f'sqlite:///{DATABASE_URL}',

src/WikidataTextifier.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

2-
from .WikidataLabel import WikidataLabel, LazyLabelFactory
3-
from .utils import get_wikidata_entities_by_ids, get_all_missing_labels_ids, get_lang_val, time_to_text, quantity_to_text
2+
from .WikidataLabel import LazyLabelFactory
3+
from .utils import get_wikidata_entities_by_ids, get_lang_val
44
from datetime import datetime, date
55
from dataclasses import dataclass
66
import re
@@ -435,6 +435,9 @@ def from_id(cls, id: str, lang: str = 'en'):
435435
raise ValueError(f"ID not found.")
436436

437437
entity_dict = entity_dict[id]
438+
if 'labels' not in entity_dict:
439+
return None
440+
438441
label = get_lang_val(entity_dict['labels'], lang)
439442
description = get_lang_val(entity_dict['descriptions'], lang)
440443

src/utils.py

Lines changed: 0 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
from .WikidataLabel import WikidataLabel
21
import requests
3-
from datetime import datetime, date
42

53
def get_wikidata_entities_by_ids(
64
ids,
@@ -81,171 +79,6 @@ def get_all_missing_labels_ids(data):
8179

8280
return ids_list
8381

84-
def time_to_text(time_data, lang='en'):
85-
"""
86-
Converts Wikidata time data into a human-readable string.
87-
88-
Parameters:
89-
- time_data (dict): A dictionary containing the time string, precision, and calendar model.
90-
- lang (str): The language code for the output (currently not supported).
91-
92-
Returns:
93-
- str: A textual representation of the time with appropriate granularity.
94-
"""
95-
if time_data is None:
96-
return None
97-
98-
time_value = time_data['time']
99-
precision = time_data['precision']
100-
calendarmodel = time_data.get('calendarmodel', 'http://www.wikidata.org/entity/Q1985786')
101-
102-
# Use regex to parse the time string
103-
pattern = r'([+-])(\d{1,16})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z'
104-
match = re.match(pattern, time_value)
105-
106-
if not match:
107-
raise ValueError("Malformed time string")
108-
109-
sign, year_str, month_str, day_str, hour_str, minute_str, second_str = match.groups()
110-
year = int(year_str) * (1 if sign == '+' else -1)
111-
112-
# Convert Julian to Gregorian if necessary
113-
if 'Q1985786' in calendarmodel and year > 1 and len(str(abs(year))) <= 4: # Julian calendar
114-
try:
115-
month = 1 if month_str == '00' else int(month_str)
116-
day = 1 if day_str == '00' else int(day_str)
117-
julian_date = date(year, month, day)
118-
gregorian_ordinal = julian_date.toordinal() + (datetime(1582, 10, 15).toordinal() - datetime(1582, 10, 5).toordinal())
119-
gregorian_date = date.fromordinal(gregorian_ordinal)
120-
year, month, day = gregorian_date.year, gregorian_date.month, gregorian_date.day
121-
except ValueError:
122-
raise ValueError("Invalid date for Julian calendar")
123-
else:
124-
month = int(month_str) if month_str != '00' else 1
125-
day = int(day_str) if day_str != '00' else 1
126-
127-
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
128-
month_str = months[month - 1] if month != 0 else ''
129-
era = 'AD' if year > 0 else 'BC'
130-
131-
if precision == 14:
132-
return f"{year} {month_str} {day} {hour_str}:{minute_str}:{second_str}"
133-
elif precision == 13:
134-
return f"{year} {month_str} {day} {hour_str}:{minute_str}"
135-
elif precision == 12:
136-
return f"{year} {month_str} {day} {hour_str}:00"
137-
elif precision == 11:
138-
return f"{day} {month_str} {year}"
139-
elif precision == 10:
140-
return f"{month_str} {year}"
141-
elif precision == 9:
142-
return f"{abs(year)} {era}"
143-
elif precision == 8:
144-
decade = (year // 10) * 10
145-
return f"{abs(decade)}s {era}"
146-
elif precision == 7:
147-
century = (abs(year) - 1) // 100 + 1
148-
return f"{century}th century {era}"
149-
elif precision == 6:
150-
millennium = (abs(year) - 1) // 1000 + 1
151-
return f"{millennium}th millennium {era}"
152-
elif precision == 5:
153-
tens_of_thousands = abs(year) // 10000
154-
return f"{tens_of_thousands} ten thousand years {era}"
155-
elif precision == 4:
156-
hundreds_of_thousands = abs(year) // 100000
157-
return f"{hundreds_of_thousands} hundred thousand years {era}"
158-
elif precision == 3:
159-
millions = abs(year) // 1000000
160-
return f"{millions} million years {era}"
161-
elif precision == 2:
162-
tens_of_millions = abs(year) // 10000000
163-
return f"{tens_of_millions} tens of millions of years {era}"
164-
elif precision == 1:
165-
hundreds_of_millions = abs(year) // 100000000
166-
return f"{hundreds_of_millions} hundred million years {era}"
167-
elif precision == 0:
168-
billions = abs(year) // 1000000000
169-
return f"{billions} billion years {era}"
170-
else:
171-
raise ValueError(f"Unknown precision value {precision}")
172-
173-
174-
def quantity_to_text(quantity_data, labels={}, lang='en'):
175-
"""
176-
Converts Wikidata quantity data into a human-readable string.
177-
178-
Parameters:
179-
- quantity_data (dict): A dictionary with 'amount' and optionally 'unit' (often a QID).
180-
- labels (dict): A dictionary mapping QIDs to their labels, previously fetched.
181-
- lang (str): The language code for the output.
182-
183-
Returns:
184-
- str: A textual representation of the quantity (e.g., "5 kg").
185-
"""
186-
if quantity_data is None:
187-
return None
188-
189-
quantity = quantity_data.get('amount')
190-
unit = quantity_data.get('unit')
191-
192-
# 'unit' of '1' means that the value is a count and doesn't require a unit.
193-
if unit == '1':
194-
unit = None
195-
else:
196-
unit_qid = unit.rsplit('/')[-1]
197-
if unit_qid in labels:
198-
unit = labels[unit_qid]
199-
else:
200-
unit = WikidataLabel.get_labels(unit_qid)
201-
unit = get_lang_val(unit, lang=lang)
202-
203-
return quantity + (f" {unit}" if unit else "")
204-
205-
206-
def globalcoordinate_to_text(coor_data, lang='en'):
207-
"""
208-
Convert a single decimal degree value to DMS with hemisphere suffix.
209-
`hemi_pair` is ("N", "S") for latitude or ("E", "W") for longitude.
210-
211-
Parameters:
212-
- coor_data (dict): A dictionary with 'latitude' and 'longitude' keys.
213-
- lang (str): The language code for the output (currently not supported).
214-
215-
Returns:
216-
- str: A string representation of the coordinates in DMS format.
217-
"""
218-
219-
latitude = abs(coor_data['latitude'])
220-
hemi = 'N' if coor_data['latitude'] >= 0 else 'S'
221-
222-
degrees = int(latitude)
223-
minutes_full = (latitude - degrees) * 60
224-
minutes = int(minutes_full)
225-
seconds = (minutes_full - minutes) * 60
226-
227-
# Round to-tenth of a second, drop trailing .0
228-
seconds = round(seconds, 1)
229-
seconds_str = f"{seconds}".rstrip("0").rstrip(".")
230-
231-
lat_str = f"{degrees}°{minutes}'{seconds_str}\"{hemi}"
232-
233-
longitude = abs(coor_data['longitude'])
234-
hemi = 'E' if coor_data['longitude'] >= 0 else 'W'
235-
236-
degrees = int(longitude)
237-
minutes_full = (longitude - degrees) * 60
238-
minutes = int(minutes_full)
239-
seconds = (minutes_full - minutes) * 60
240-
241-
# Round to-tenth of a second, drop trailing .0
242-
seconds = round(seconds, 1)
243-
seconds_str = f"{seconds}".rstrip("0").rstrip(".")
244-
245-
lon_str = f"{degrees}°{minutes}'{seconds_str}\"{hemi}"
246-
247-
return f'{lat_str}, {lon_str}'
248-
24982
def get_lang_val(data, lang='en'):
25083
"""
25184
Extracts the value for a given language from a dictionary of labels.

0 commit comments

Comments
 (0)