Skip to content

Commit 85a32c1

Browse files
Initial Commit
0 parents  commit 85a32c1

13 files changed

Lines changed: 4430 additions & 0 deletions

.gitignore

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# SQLite Database
2+
*.db
3+
4+
# Byte-compiled / optimized / DLL files
5+
__pycache__/
6+
*.py[cod]
7+
*$py.class
8+
9+
# C extensions
10+
*.so
11+
12+
# Distribution / packaging
13+
.Python
14+
build/
15+
develop-eggs/
16+
# dist/
17+
downloads/
18+
eggs/
19+
.eggs/
20+
lib/
21+
lib64/
22+
parts/
23+
sdist/
24+
var/
25+
wheels/
26+
share/python-wheels/
27+
*.egg-info/
28+
.installed.cfg
29+
*.egg
30+
MANIFEST
31+
32+
# PyInstaller
33+
# Usually these files are written by a python script from a template
34+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
35+
*.manifest
36+
*.spec
37+
38+
# Installer logs
39+
pip-log.txt
40+
pip-delete-this-directory.txt
41+
42+
# Unit test / coverage reports
43+
htmlcov/
44+
.tox/
45+
.nox/
46+
.coverage
47+
.coverage.*
48+
.cache
49+
nosetests.xml
50+
coverage.xml
51+
*.cover
52+
*.py,cover
53+
.hypothesis/
54+
.pytest_cache/
55+
cover/
56+
57+
# Translations
58+
*.mo
59+
*.pot
60+
61+
# Django stuff:
62+
*.log
63+
local_settings.py
64+
db.sqlite3
65+
db.sqlite3-journal
66+
*.db
67+
68+
# Flask stuff:
69+
instance/
70+
.webassets-cache
71+
72+
# Scrapy stuff:
73+
.scrapy
74+
75+
# Sphinx documentation
76+
docs/_build/
77+
78+
# PyBuilder
79+
.pybuilder/
80+
target/
81+
82+
# Jupyter Notebook
83+
.ipynb_checkpoints
84+
85+
# IPython
86+
profile_default/
87+
ipython_config.py
88+
89+
# pyenv
90+
# For a library or package, you might want to ignore these files since the code is
91+
# intended to run in multiple environments; otherwise, check them in:
92+
# .python-version
93+
94+
# pipenv
95+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
97+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
98+
# install all needed dependencies.
99+
#Pipfile.lock
100+
101+
# poetry
102+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103+
# This is especially recommended for binary packages to ensure reproducibility, and is more
104+
# commonly ignored for libraries.
105+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106+
#poetry.lock
107+
108+
# pdm
109+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110+
#pdm.lock
111+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112+
# in version control.
113+
# https://pdm.fming.dev/#use-with-ide
114+
.pdm.toml
115+
116+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117+
__pypackages__/
118+
119+
# Celery stuff
120+
celerybeat-schedule
121+
celerybeat.pid
122+
123+
# SageMath parsed files
124+
*.sage.py
125+
126+
# Environments
127+
.env
128+
.venv
129+
env/
130+
venv/
131+
ENV/
132+
env.bak/
133+
venv.bak/
134+
135+
# Spyder project settings
136+
.spyderproject
137+
.spyproject
138+
139+
# Rope project settings
140+
.ropeproject
141+
142+
# mkdocs documentation
143+
/site
144+
145+
# mypy
146+
.mypy_cache/
147+
.dmypy.json
148+
dmypy.json
149+
150+
# Pyre type checker
151+
.pyre/
152+
153+
# pytype static type analyzer
154+
.pytype/
155+
156+
# Cython debug symbols
157+
cython_debug/
158+
159+
# PyCharm
160+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162+
# and can be added to the global gitignore or merged into this file. For a more nuclear
163+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
164+
#.idea/

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.13

Dockerfile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
FROM python:3.13-slim-bookworm
2+
COPY --from=ghcr.io/astral-sh/uv:0.7 /uv /uvx /bin/
3+
4+
# Setup the app in workspace
5+
WORKDIR /workspace
6+
7+
# Install backend dependencies
8+
COPY --chmod=755 pyproject.toml .
9+
COPY --chmod=755 uv.lock .
10+
RUN uv sync
11+
12+
# Copy backend for production
13+
COPY --chmod=755 . .
14+
15+
# Container start script
16+
CMD [ "uv", "run", "main.py" ]

README.md

Whitespace-only changes.

docker-compose.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
services:
2+
wdtextifier:
3+
build: .
4+
container_name: wdtextifier
5+
ports:
6+
- "8000:8000"

main.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
from typing import Annotated
2+
import time
3+
import os
4+
import traceback
5+
6+
# Import necessary types and classes from FastAPI and other libraries.
7+
from fastapi import FastAPI, Header, HTTPException, Query, Request
8+
from fastapi.middleware.cors import CORSMiddleware
9+
10+
from src.WikidataTextifier import WikidataEntity
11+
12+
# Start Fastapi app
13+
app = FastAPI(
14+
title="Wikidata Textifier",
15+
description="Transforms Wikidata entities into text representations.",
16+
version="1.0.0",
17+
docs_url="/docs", # Change the Swagger UI path if needed
18+
redoc_url="/redoc", # Change the ReDoc path if needed
19+
swagger_ui_parameters={"persistAuthorization": True},
20+
)
21+
22+
# Enable all Cors
23+
app.add_middleware(
24+
CORSMiddleware,
25+
allow_origins=["*"],
26+
allow_credentials=False,
27+
allow_methods=["GET"],
28+
allow_headers=["*"],
29+
)
30+
31+
@app.get(
32+
"/",
33+
responses={
34+
200: {
35+
"description": "Returns a list of relevant Wikidata property PIDs with similarity scores",
36+
"content": {
37+
"application/json": {
38+
"example": [{
39+
"Q42": "Douglas Adams (human), English writer, humorist, and dramatist...",
40+
}]
41+
}
42+
},
43+
},
44+
422: {
45+
"description": "Missing or invalid query parameter",
46+
"content": {
47+
"application/json": {
48+
"example": {"detail": "ID is missing"}
49+
}
50+
},
51+
},
52+
},
53+
)
54+
async def property_query_route(
55+
request: Request,
56+
id: str = Query(..., example="Q42"),
57+
lang: str = 'en',
58+
json: bool = True,
59+
):
60+
"""
61+
Retrieve a Wikidata item with all labels or textual representations for an LLM.
62+
63+
Args:
64+
id (str): The Wikidata item ID (e.g., "Q42").
65+
json (bool): If True, returns the item in JSON format. Defaults to True.
66+
67+
Returns:
68+
list: A list of dictionaries containing QIDs and the similarity scores.
69+
"""
70+
if not id:
71+
response = "ID is missing"
72+
raise HTTPException(status_code=422, detail=response)
73+
74+
try:
75+
entity = WikidataEntity.from_id(id, lang=lang)
76+
77+
if json:
78+
results = entity.to_json()
79+
else:
80+
results = str(entity)
81+
82+
return results
83+
except Exception as e:
84+
traceback.print_exc()
85+
raise HTTPException(status_code=500, detail="Internal Server Error")

pyproject.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[project]
2+
name = "WikidataTextifier"
3+
version = "0.1.0"
4+
description = "Transforms Wikidata entities into text representations."
5+
readme = "README.md"
6+
requires-python = ">=3.13"
7+
dependencies = [
8+
"fastapi>=0.116.1",
9+
"sqlalchemy>=2.0.41",
10+
]

0 commit comments

Comments
 (0)