|
1 | | -from .WikidataLabel import WikidataLabel |
2 | 1 | import requests |
3 | | -from datetime import datetime, date |
4 | 2 |
|
5 | 3 | def get_wikidata_entities_by_ids( |
6 | 4 | ids, |
@@ -81,171 +79,6 @@ def get_all_missing_labels_ids(data): |
81 | 79 |
|
82 | 80 | return ids_list |
83 | 81 |
|
84 | | -def time_to_text(time_data, lang='en'): |
85 | | - """ |
86 | | - Converts Wikidata time data into a human-readable string. |
87 | | -
|
88 | | - Parameters: |
89 | | - - time_data (dict): A dictionary containing the time string, precision, and calendar model. |
90 | | - - lang (str): The language code for the output (currently not supported). |
91 | | -
|
92 | | - Returns: |
93 | | - - str: A textual representation of the time with appropriate granularity. |
94 | | - """ |
95 | | - if time_data is None: |
96 | | - return None |
97 | | - |
98 | | - time_value = time_data['time'] |
99 | | - precision = time_data['precision'] |
100 | | - calendarmodel = time_data.get('calendarmodel', 'http://www.wikidata.org/entity/Q1985786') |
101 | | - |
102 | | - # Use regex to parse the time string |
103 | | - pattern = r'([+-])(\d{1,16})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z' |
104 | | - match = re.match(pattern, time_value) |
105 | | - |
106 | | - if not match: |
107 | | - raise ValueError("Malformed time string") |
108 | | - |
109 | | - sign, year_str, month_str, day_str, hour_str, minute_str, second_str = match.groups() |
110 | | - year = int(year_str) * (1 if sign == '+' else -1) |
111 | | - |
112 | | - # Convert Julian to Gregorian if necessary |
113 | | - if 'Q1985786' in calendarmodel and year > 1 and len(str(abs(year))) <= 4: # Julian calendar |
114 | | - try: |
115 | | - month = 1 if month_str == '00' else int(month_str) |
116 | | - day = 1 if day_str == '00' else int(day_str) |
117 | | - julian_date = date(year, month, day) |
118 | | - gregorian_ordinal = julian_date.toordinal() + (datetime(1582, 10, 15).toordinal() - datetime(1582, 10, 5).toordinal()) |
119 | | - gregorian_date = date.fromordinal(gregorian_ordinal) |
120 | | - year, month, day = gregorian_date.year, gregorian_date.month, gregorian_date.day |
121 | | - except ValueError: |
122 | | - raise ValueError("Invalid date for Julian calendar") |
123 | | - else: |
124 | | - month = int(month_str) if month_str != '00' else 1 |
125 | | - day = int(day_str) if day_str != '00' else 1 |
126 | | - |
127 | | - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] |
128 | | - month_str = months[month - 1] if month != 0 else '' |
129 | | - era = 'AD' if year > 0 else 'BC' |
130 | | - |
131 | | - if precision == 14: |
132 | | - return f"{year} {month_str} {day} {hour_str}:{minute_str}:{second_str}" |
133 | | - elif precision == 13: |
134 | | - return f"{year} {month_str} {day} {hour_str}:{minute_str}" |
135 | | - elif precision == 12: |
136 | | - return f"{year} {month_str} {day} {hour_str}:00" |
137 | | - elif precision == 11: |
138 | | - return f"{day} {month_str} {year}" |
139 | | - elif precision == 10: |
140 | | - return f"{month_str} {year}" |
141 | | - elif precision == 9: |
142 | | - return f"{abs(year)} {era}" |
143 | | - elif precision == 8: |
144 | | - decade = (year // 10) * 10 |
145 | | - return f"{abs(decade)}s {era}" |
146 | | - elif precision == 7: |
147 | | - century = (abs(year) - 1) // 100 + 1 |
148 | | - return f"{century}th century {era}" |
149 | | - elif precision == 6: |
150 | | - millennium = (abs(year) - 1) // 1000 + 1 |
151 | | - return f"{millennium}th millennium {era}" |
152 | | - elif precision == 5: |
153 | | - tens_of_thousands = abs(year) // 10000 |
154 | | - return f"{tens_of_thousands} ten thousand years {era}" |
155 | | - elif precision == 4: |
156 | | - hundreds_of_thousands = abs(year) // 100000 |
157 | | - return f"{hundreds_of_thousands} hundred thousand years {era}" |
158 | | - elif precision == 3: |
159 | | - millions = abs(year) // 1000000 |
160 | | - return f"{millions} million years {era}" |
161 | | - elif precision == 2: |
162 | | - tens_of_millions = abs(year) // 10000000 |
163 | | - return f"{tens_of_millions} tens of millions of years {era}" |
164 | | - elif precision == 1: |
165 | | - hundreds_of_millions = abs(year) // 100000000 |
166 | | - return f"{hundreds_of_millions} hundred million years {era}" |
167 | | - elif precision == 0: |
168 | | - billions = abs(year) // 1000000000 |
169 | | - return f"{billions} billion years {era}" |
170 | | - else: |
171 | | - raise ValueError(f"Unknown precision value {precision}") |
172 | | - |
173 | | - |
174 | | -def quantity_to_text(quantity_data, labels={}, lang='en'): |
175 | | - """ |
176 | | - Converts Wikidata quantity data into a human-readable string. |
177 | | -
|
178 | | - Parameters: |
179 | | - - quantity_data (dict): A dictionary with 'amount' and optionally 'unit' (often a QID). |
180 | | - - labels (dict): A dictionary mapping QIDs to their labels, previously fetched. |
181 | | - - lang (str): The language code for the output. |
182 | | -
|
183 | | - Returns: |
184 | | - - str: A textual representation of the quantity (e.g., "5 kg"). |
185 | | - """ |
186 | | - if quantity_data is None: |
187 | | - return None |
188 | | - |
189 | | - quantity = quantity_data.get('amount') |
190 | | - unit = quantity_data.get('unit') |
191 | | - |
192 | | - # 'unit' of '1' means that the value is a count and doesn't require a unit. |
193 | | - if unit == '1': |
194 | | - unit = None |
195 | | - else: |
196 | | - unit_qid = unit.rsplit('/')[-1] |
197 | | - if unit_qid in labels: |
198 | | - unit = labels[unit_qid] |
199 | | - else: |
200 | | - unit = WikidataLabel.get_labels(unit_qid) |
201 | | - unit = get_lang_val(unit, lang=lang) |
202 | | - |
203 | | - return quantity + (f" {unit}" if unit else "") |
204 | | - |
205 | | - |
206 | | -def globalcoordinate_to_text(coor_data, lang='en'): |
207 | | - """ |
208 | | - Convert a single decimal degree value to DMS with hemisphere suffix. |
209 | | - `hemi_pair` is ("N", "S") for latitude or ("E", "W") for longitude. |
210 | | -
|
211 | | - Parameters: |
212 | | - - coor_data (dict): A dictionary with 'latitude' and 'longitude' keys. |
213 | | - - lang (str): The language code for the output (currently not supported). |
214 | | -
|
215 | | - Returns: |
216 | | - - str: A string representation of the coordinates in DMS format. |
217 | | - """ |
218 | | - |
219 | | - latitude = abs(coor_data['latitude']) |
220 | | - hemi = 'N' if coor_data['latitude'] >= 0 else 'S' |
221 | | - |
222 | | - degrees = int(latitude) |
223 | | - minutes_full = (latitude - degrees) * 60 |
224 | | - minutes = int(minutes_full) |
225 | | - seconds = (minutes_full - minutes) * 60 |
226 | | - |
227 | | - # Round to-tenth of a second, drop trailing .0 |
228 | | - seconds = round(seconds, 1) |
229 | | - seconds_str = f"{seconds}".rstrip("0").rstrip(".") |
230 | | - |
231 | | - lat_str = f"{degrees}°{minutes}'{seconds_str}\"{hemi}" |
232 | | - |
233 | | - longitude = abs(coor_data['longitude']) |
234 | | - hemi = 'E' if coor_data['longitude'] >= 0 else 'W' |
235 | | - |
236 | | - degrees = int(longitude) |
237 | | - minutes_full = (longitude - degrees) * 60 |
238 | | - minutes = int(minutes_full) |
239 | | - seconds = (minutes_full - minutes) * 60 |
240 | | - |
241 | | - # Round to-tenth of a second, drop trailing .0 |
242 | | - seconds = round(seconds, 1) |
243 | | - seconds_str = f"{seconds}".rstrip("0").rstrip(".") |
244 | | - |
245 | | - lon_str = f"{degrees}°{minutes}'{seconds_str}\"{hemi}" |
246 | | - |
247 | | - return f'{lat_str}, {lon_str}' |
248 | | - |
249 | 82 | def get_lang_val(data, lang='en'): |
250 | 83 | """ |
251 | 84 | Extracts the value for a given language from a dictionary of labels. |
|
0 commit comments