|
1 | 1 |
|
2 | | -from .WikidataLabel import LazyLabelFactory |
3 | | -from .utils import get_wikidata_entities_by_ids, get_lang_val |
| 2 | +from .WikidataLabel import WikidataLabel, LazyLabelFactory |
| 3 | +from .utils import get_wikidata_entities_by_ids, wikidata_time_to_text, wikidata_geolocation_to_text |
4 | 4 | from datetime import datetime, date |
5 | 5 | from dataclasses import dataclass |
6 | 6 | import re |
@@ -49,159 +49,76 @@ def to_json(self): |
49 | 49 | class WikidataCoordinates: |
50 | 50 | latitude: float | None = None |
51 | 51 | longitude: float | None = None |
| 52 | + string_val: str | None = None |
52 | 53 |
|
53 | 54 | @classmethod |
54 | 55 | def from_raw(cls, value, lazylabel): |
55 | 56 | if not isinstance(value, dict): |
56 | 57 | return cls( |
57 | | - time=None, |
58 | | - precision=None, |
59 | | - calendarmodel=None |
| 58 | + latitude=None, |
| 59 | + longitude=None, |
| 60 | + string_val=None |
60 | 61 | ) |
61 | 62 |
|
| 63 | + string_val = wikidata_geolocation_to_text( |
| 64 | + value.get('latitude'), |
| 65 | + value.get('longitude') |
| 66 | + ) |
| 67 | + |
62 | 68 | return cls( |
63 | 69 | latitude=value.get('latitude'), |
64 | | - longitude=value.get('longitude') |
| 70 | + longitude=value.get('longitude'), |
| 71 | + string_val=string_val |
65 | 72 | ) |
66 | 73 |
|
67 | 74 | def __str__(self): |
68 | | - latitude = abs(self.latitude) |
69 | | - hemi = 'N' if self.latitude >= 0 else 'S' |
70 | | - |
71 | | - degrees = int(latitude) |
72 | | - minutes_full = (latitude - degrees) * 60 |
73 | | - minutes = int(minutes_full) |
74 | | - seconds = (minutes_full - minutes) * 60 |
75 | | - |
76 | | - # Round to-tenth of a second, drop trailing .0 |
77 | | - seconds = round(seconds, 1) |
78 | | - seconds_str = f"{seconds}".rstrip("0").rstrip(".") |
79 | | - |
80 | | - lat_str = f"{degrees}°{minutes}'{seconds_str}\"{hemi}" |
81 | | - |
82 | | - longitude = abs(self.longitude) |
83 | | - hemi = 'E' if self.longitude >= 0 else 'W' |
84 | | - |
85 | | - degrees = int(longitude) |
86 | | - minutes_full = (longitude - degrees) * 60 |
87 | | - minutes = int(minutes_full) |
88 | | - seconds = (minutes_full - minutes) * 60 |
89 | | - |
90 | | - # Round to-tenth of a second, drop trailing .0 |
91 | | - seconds = round(seconds, 1) |
92 | | - seconds_str = f"{seconds}".rstrip("0").rstrip(".") |
93 | | - |
94 | | - lon_str = f"{degrees}°{minutes}'{seconds_str}\"{hemi}" |
95 | | - |
96 | | - return f'{lat_str}, {lon_str}' |
| 75 | + return self.string_val or '' |
97 | 76 |
|
98 | 77 | def to_json(self): |
99 | 78 | return { |
100 | 79 | 'latitude': self.latitude, |
101 | | - 'longitude': self.longitude |
| 80 | + 'longitude': self.longitude, |
| 81 | + 'string': self.string_val |
102 | 82 | } |
103 | 83 |
|
104 | 84 | @dataclass |
105 | 85 | class WikidataTime: |
106 | 86 | time: str | None = None |
107 | 87 | precision: int | None = None |
108 | 88 | calendarmodel: str | None = None |
| 89 | + string_val: str | None = None |
109 | 90 |
|
110 | 91 | @classmethod |
111 | 92 | def from_raw(cls, value, lazylabel): |
112 | 93 | if not isinstance(value, dict): |
113 | 94 | return cls( |
114 | 95 | time=None, |
115 | 96 | precision=None, |
116 | | - calendarmodel=None |
| 97 | + calendarmodel=None, |
| 98 | + string_val=None |
117 | 99 | ) |
118 | 100 |
|
119 | 101 | calendarmodel = value.get('calendarmodel', 'Q1985786') |
120 | 102 | calendarmodel = calendarmodel.split('/')[-1] |
| 103 | + |
| 104 | + string_val = wikidata_time_to_text(value, lazylabel.lang) |
| 105 | + |
121 | 106 | return cls( |
122 | 107 | time=value.get('time'), |
123 | 108 | precision=value.get('precision'), |
124 | | - calendarmodel=calendarmodel |
| 109 | + calendarmodel=calendarmodel, |
| 110 | + string_val=string_val |
125 | 111 | ) |
126 | 112 |
|
127 | 113 | def __str__(self): |
128 | | - # Use regex to parse the time string |
129 | | - pattern = r'([+-])(\d{1,16})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z' |
130 | | - match = re.match(pattern, self.time) |
131 | | - |
132 | | - if not match: |
133 | | - raise ValueError("Malformed time string") |
134 | | - |
135 | | - sign, year_str, month_str, day_str, hour_str, minute_str, second_str = match.groups() |
136 | | - year = int(year_str) * (1 if sign == '+' else -1) |
137 | | - |
138 | | - # Convert Julian to Gregorian if necessary |
139 | | - if 'Q1985786' in self.calendarmodel and year > 1 and len(str(abs(year))) <= 4: # Julian calendar |
140 | | - try: |
141 | | - month = 1 if month_str == '00' else int(month_str) |
142 | | - day = 1 if day_str == '00' else int(day_str) |
143 | | - julian_date = date(year, month, day) |
144 | | - gregorian_ordinal = julian_date.toordinal() + (datetime(1582, 10, 15).toordinal() - datetime(1582, 10, 5).toordinal()) |
145 | | - gregorian_date = date.fromordinal(gregorian_ordinal) |
146 | | - year, month, day = gregorian_date.year, gregorian_date.month, gregorian_date.day |
147 | | - except ValueError: |
148 | | - raise ValueError("Invalid date for Julian calendar") |
149 | | - else: |
150 | | - month = int(month_str) if month_str != '00' else 1 |
151 | | - day = int(day_str) if day_str != '00' else 1 |
152 | | - |
153 | | - # Next step: take translations from Wikidata Labels |
154 | | - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] |
155 | | - month_str = months[month - 1] if month != 0 else '' |
156 | | - era = 'AD' if year > 0 else 'BC' |
157 | | - |
158 | | - if self.precision == 14: |
159 | | - return f"{year} {month_str} {day} {hour_str}:{minute_str}:{second_str}" |
160 | | - elif self.precision == 13: |
161 | | - return f"{year} {month_str} {day} {hour_str}:{minute_str}" |
162 | | - elif self.precision == 12: |
163 | | - return f"{year} {month_str} {day} {hour_str}:00" |
164 | | - elif self.precision == 11: |
165 | | - return f"{day} {month_str} {year}" |
166 | | - elif self.precision == 10: |
167 | | - return f"{month_str} {year}" |
168 | | - elif self.precision == 9: |
169 | | - return f"{abs(year)} {era}" |
170 | | - elif self.precision == 8: |
171 | | - decade = (year // 10) * 10 |
172 | | - return f"{abs(decade)}s {era}" |
173 | | - elif self.precision == 7: |
174 | | - century = (abs(year) - 1) // 100 + 1 |
175 | | - return f"{century}th century {era}" |
176 | | - elif self.precision == 6: |
177 | | - millennium = (abs(year) - 1) // 1000 + 1 |
178 | | - return f"{millennium}th millennium {era}" |
179 | | - elif self.precision == 5: |
180 | | - tens_of_thousands = abs(year) // 10000 |
181 | | - return f"{tens_of_thousands} ten thousand years {era}" |
182 | | - elif self.precision == 4: |
183 | | - hundreds_of_thousands = abs(year) // 100000 |
184 | | - return f"{hundreds_of_thousands} hundred thousand years {era}" |
185 | | - elif self.precision == 3: |
186 | | - millions = abs(year) // 1000000 |
187 | | - return f"{millions} million years {era}" |
188 | | - elif self.precision == 2: |
189 | | - tens_of_millions = abs(year) // 10000000 |
190 | | - return f"{tens_of_millions} tens of millions of years {era}" |
191 | | - elif self.precision == 1: |
192 | | - hundreds_of_millions = abs(year) // 100000000 |
193 | | - return f"{hundreds_of_millions} hundred million years {era}" |
194 | | - elif self.precision == 0: |
195 | | - billions = abs(year) // 1000000000 |
196 | | - return f"{billions} billion years {era}" |
197 | | - else: |
198 | | - raise ValueError(f"Unknown precision value {self.precision}") |
| 114 | + return self.string_val or '' |
199 | 115 |
|
200 | 116 | def to_json(self): |
201 | 117 | return { |
202 | 118 | 'time': self.time, |
203 | 119 | 'precision': self.precision, |
204 | | - 'calendar_QID': self.calendarmodel |
| 120 | + 'calendar_QID': self.calendarmodel, |
| 121 | + 'string': self.string_val |
205 | 122 | } |
206 | 123 |
|
207 | 124 | @dataclass |
@@ -493,8 +410,8 @@ def from_id(cls, id: str, lang: str = 'en', external_ids: bool = True): |
493 | 410 | if 'labels' not in entity_dict: |
494 | 411 | return None |
495 | 412 |
|
496 | | - label = get_lang_val(entity_dict['labels'], lang) |
497 | | - description = get_lang_val(entity_dict['descriptions'], lang) |
| 413 | + label = WikidataLabel.get_lang_val(entity_dict['labels'], lang) |
| 414 | + description = WikidataLabel.get_lang_val(entity_dict['descriptions'], lang) |
498 | 415 |
|
499 | 416 | aliases = entity_dict['aliases'].get(lang, []) + \ |
500 | 417 | entity_dict['aliases'].get('mul', []) |
|
0 commit comments