Skip to content

Commit 7ffca39

Browse files
authored
Merge pull request #160 from vinnyspb/add-statistics-fetch
Adds fetch_all_statistics_of method
2 parents 162b0d5 + a2b1eb6 commit 7ffca39

File tree

3 files changed

+167
-23
lines changed

3 files changed

+167
-23
lines changed

detective/core.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,53 @@ def fetch_all_data_of(self, sensors: Tuple[str], limit=50000) -> pd.DataFrame:
156156
df = pd.read_sql_query(query, con=self.con)
157157
print(f"The returned Pandas dataframe has {df.shape[0]} rows of data.")
158158
return df
159+
160+
def fetch_all_statistics_of(self, sensors: Tuple[str], limit=50000) -> pd.DataFrame:
161+
"""
162+
Fetch aggregated statistics for sensors.
163+
164+
Arguments:
165+
- limit (default: 50000): Limit the maximum number of state changes loaded.
166+
If None, there is no limit.
167+
"""
168+
# Statistics imported from an external source are similar to entity_id,
169+
# but use a : instead of a . as a delimiter between the domain and object ID.
170+
sensors_with_semicolons = [sensor.replace('.', ':') for sensor in sensors]
171+
sensors_combined = list(sensors) + sensors_with_semicolons
172+
sensors_str = str(tuple(sensors_combined))
173+
if len(sensors_combined) == 1:
174+
sensors_str = sensors_str.replace(",", "")
175+
176+
query = f"""
177+
WITH combined_states AS (
178+
SELECT
179+
statistics.created_ts,
180+
statistics.start_ts,
181+
statistics.last_reset_ts,
182+
statistics.mean,
183+
statistics.max,
184+
statistics.sum,
185+
statistics.state,
186+
statistics_meta.statistic_id,
187+
statistics_meta.source,
188+
statistics_meta.unit_of_measurement,
189+
statistics_meta.has_mean,
190+
statistics_meta.has_sum
191+
FROM statistics
192+
JOIN statistics_meta
193+
ON statistics.metadata_id = statistics_meta.id
194+
)
195+
SELECT *
196+
FROM combined_states
197+
WHERE
198+
statistic_id IN {sensors_str}
199+
ORDER BY created_ts DESC
200+
"""
201+
202+
if limit is not None:
203+
query += f"LIMIT {limit}"
204+
print(query)
205+
query = text(query)
206+
df = pd.read_sql_query(query, con=self.con)
207+
print(f"The returned Pandas dataframe has {df.shape[0]} rows of data.")
208+
return df

notebooks/Getting started with detective.ipynb

Lines changed: 114 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 13,
12+
"execution_count": 24,
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
@@ -28,7 +28,7 @@
2828
},
2929
{
3030
"cell_type": "code",
31-
"execution_count": 14,
31+
"execution_count": null,
3232
"metadata": {},
3333
"outputs": [],
3434
"source": [
@@ -37,7 +37,7 @@
3737
},
3838
{
3939
"cell_type": "code",
40-
"execution_count": 15,
40+
"execution_count": 26,
4141
"metadata": {},
4242
"outputs": [
4343
{
@@ -69,7 +69,7 @@
6969
},
7070
{
7171
"cell_type": "code",
72-
"execution_count": 16,
72+
"execution_count": 27,
7373
"metadata": {
7474
"collapsed": false,
7575
"inputHidden": false,
@@ -92,7 +92,7 @@
9292
},
9393
{
9494
"cell_type": "code",
95-
"execution_count": 17,
95+
"execution_count": 28,
9696
"metadata": {},
9797
"outputs": [
9898
{
@@ -109,7 +109,7 @@
109109
" 'zone.home']"
110110
]
111111
},
112-
"execution_count": 17,
112+
"execution_count": 28,
113113
"metadata": {},
114114
"output_type": "execute_result"
115115
}
@@ -127,7 +127,7 @@
127127
},
128128
{
129129
"cell_type": "code",
130-
"execution_count": 18,
130+
"execution_count": 29,
131131
"metadata": {},
132132
"outputs": [
133133
{
@@ -159,7 +159,7 @@
159159
},
160160
{
161161
"cell_type": "code",
162-
"execution_count": 19,
162+
"execution_count": 30,
163163
"metadata": {},
164164
"outputs": [
165165
{
@@ -204,7 +204,7 @@
204204
"0 0 1.680324e+09 zone.home"
205205
]
206206
},
207-
"execution_count": 19,
207+
"execution_count": 30,
208208
"metadata": {},
209209
"output_type": "execute_result"
210210
}
@@ -225,7 +225,7 @@
225225
},
226226
{
227227
"cell_type": "code",
228-
"execution_count": 20,
228+
"execution_count": 31,
229229
"metadata": {},
230230
"outputs": [
231231
{
@@ -260,7 +260,7 @@
260260
},
261261
{
262262
"cell_type": "code",
263-
"execution_count": 21,
263+
"execution_count": 32,
264264
"metadata": {},
265265
"outputs": [
266266
{
@@ -340,7 +340,7 @@
340340
"5 2023-04-01T05:39:42+00:00 1.680324e+09 sensor.sun_next_dawn"
341341
]
342342
},
343-
"execution_count": 21,
343+
"execution_count": 32,
344344
"metadata": {},
345345
"output_type": "execute_result"
346346
}
@@ -358,7 +358,7 @@
358358
},
359359
{
360360
"cell_type": "code",
361-
"execution_count": 22,
361+
"execution_count": 33,
362362
"metadata": {},
363363
"outputs": [],
364364
"source": [
@@ -367,7 +367,7 @@
367367
},
368368
{
369369
"cell_type": "code",
370-
"execution_count": 23,
370+
"execution_count": 34,
371371
"metadata": {},
372372
"outputs": [
373373
{
@@ -407,7 +407,7 @@
407407
"Index: []"
408408
]
409409
},
410-
"execution_count": 23,
410+
"execution_count": 34,
411411
"metadata": {},
412412
"output_type": "execute_result"
413413
}
@@ -425,14 +425,110 @@
425425
},
426426
{
427427
"cell_type": "code",
428-
"execution_count": 24,
428+
"execution_count": 35,
429429
"metadata": {},
430430
"outputs": [],
431431
"source": [
432432
"df['day_of_week'] = df['last_updated_ts'].apply(lambda x : x.dayofweek)\n",
433433
"df['is_temperature'] = df['entity_id'].apply(lambda x : 'temperature' in x)"
434434
]
435435
},
436+
{
437+
"cell_type": "markdown",
438+
"metadata": {},
439+
"source": [
440+
"You can fetch [long-term statistics](https://data.home-assistant.io/docs/statistics/) using a separate function"
441+
]
442+
},
443+
{
444+
"cell_type": "code",
445+
"execution_count": 36,
446+
"metadata": {},
447+
"outputs": [
448+
{
449+
"name": "stdout",
450+
"output_type": "stream",
451+
"text": [
452+
"\n",
453+
" WITH combined_states AS (\n",
454+
" SELECT\n",
455+
" statistics.created_ts,\n",
456+
" statistics.start_ts,\n",
457+
" statistics.mean,\n",
458+
" statistics.max,\n",
459+
" statistics.sum,\n",
460+
" statistics_meta.statistic_id,\n",
461+
" statistics_meta.source,\n",
462+
" statistics_meta.unit_of_measurement,\n",
463+
" statistics_meta.has_mean,\n",
464+
" statistics_meta.has_sum\n",
465+
" FROM statistics\n",
466+
" JOIN statistics_meta\n",
467+
" ON statistics.metadata_id = statistics_meta.id\n",
468+
" )\n",
469+
" SELECT *\n",
470+
" FROM combined_states\n",
471+
" WHERE \n",
472+
" statistic_id IN ('sensor.temperature', 'sensor:temperature')\n",
473+
" ORDER BY created_ts DESC\n",
474+
" \n",
475+
"The returned Pandas dataframe has 0 rows of data.\n"
476+
]
477+
},
478+
{
479+
"data": {
480+
"text/html": [
481+
"<div>\n",
482+
"<style scoped>\n",
483+
" .dataframe tbody tr th:only-of-type {\n",
484+
" vertical-align: middle;\n",
485+
" }\n",
486+
"\n",
487+
" .dataframe tbody tr th {\n",
488+
" vertical-align: top;\n",
489+
" }\n",
490+
"\n",
491+
" .dataframe thead th {\n",
492+
" text-align: right;\n",
493+
" }\n",
494+
"</style>\n",
495+
"<table border=\"1\" class=\"dataframe\">\n",
496+
" <thead>\n",
497+
" <tr style=\"text-align: right;\">\n",
498+
" <th></th>\n",
499+
" <th>created_ts</th>\n",
500+
" <th>start_ts</th>\n",
501+
" <th>mean</th>\n",
502+
" <th>max</th>\n",
503+
" <th>sum</th>\n",
504+
" <th>statistic_id</th>\n",
505+
" <th>source</th>\n",
506+
" <th>unit_of_measurement</th>\n",
507+
" <th>has_mean</th>\n",
508+
" <th>has_sum</th>\n",
509+
" </tr>\n",
510+
" </thead>\n",
511+
" <tbody>\n",
512+
" </tbody>\n",
513+
"</table>\n",
514+
"</div>"
515+
],
516+
"text/plain": [
517+
"Empty DataFrame\n",
518+
"Columns: [created_ts, start_ts, mean, max, sum, statistic_id, source, unit_of_measurement, has_mean, has_sum]\n",
519+
"Index: []"
520+
]
521+
},
522+
"execution_count": 36,
523+
"metadata": {},
524+
"output_type": "execute_result"
525+
}
526+
],
527+
"source": [
528+
"df_long_term = db.fetch_all_statistics_of(('sensor.temperature',), limit=None)\n",
529+
"df_long_term"
530+
]
531+
},
436532
{
437533
"attachments": {},
438534
"cell_type": "markdown",
@@ -444,7 +540,7 @@
444540
],
445541
"metadata": {
446542
"kernelspec": {
447-
"display_name": "Python 3.8.5 ('venv': venv)",
543+
"display_name": ".venv",
448544
"language": "python",
449545
"name": "python3"
450546
},
@@ -458,15 +554,10 @@
458554
"name": "python",
459555
"nbconvert_exporter": "python",
460556
"pygments_lexer": "ipython3",
461-
"version": "3.8.5"
557+
"version": "3.12.0"
462558
},
463559
"nteract": {
464560
"version": "0.15.0"
465-
},
466-
"vscode": {
467-
"interpreter": {
468-
"hash": "2af4a7918fba5fbcf89f5d2677b0e673882f4b74674337f98a681302e7f6b461"
469-
}
470561
}
471562
},
472563
"nbformat": 4,

tests/test_db.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,6 @@ def test_db():
1414

1515
df = db.fetch_all_sensor_data(limit=100000)
1616
assert df is not None
17+
18+
df = db.fetch_all_statistics_of(("sensor.kitchen", "sensor.living_room", "sensor.ac"), limit=100000)
19+
assert df is not None

0 commit comments

Comments
 (0)