Merge pull request #160 from vinnyspb/add-statistics-fetch

robmarkcole · web-flow · commit 7ffca39c6a3f · 2025-02-10T12:12:55.000Z
Adds fetch_all_statistics_of method
diff --git a/detective/core.py b/detective/core.py
@@ -156,3 +156,53 @@ def fetch_all_data_of(self, sensors: Tuple[str], limit=50000) -> pd.DataFrame:
         df = pd.read_sql_query(query, con=self.con)
         print(f"The returned Pandas dataframe has {df.shape[0]} rows of data.")
         return df
+
+    def fetch_all_statistics_of(self, sensors: Tuple[str], limit=50000) -> pd.DataFrame:
+        """
+        Fetch aggregated statistics for sensors.
+
+        Arguments:
+        - limit (default: 50000): Limit the maximum number of state changes loaded.
+            If None, there is no limit.
+        """
+        # Statistics imported from an external source are similar to entity_id,
+        # but use a : instead of a . as a delimiter between the domain and object ID.
+        sensors_with_semicolons = [sensor.replace('.', ':') for sensor in sensors]
+        sensors_combined = list(sensors) + sensors_with_semicolons
+        sensors_str = str(tuple(sensors_combined))
+        if len(sensors_combined) == 1:
+            sensors_str = sensors_str.replace(",", "")
+
+        query = f"""
+            WITH combined_states AS (
+                SELECT
+                    statistics.created_ts,
+                    statistics.start_ts,
+                    statistics.last_reset_ts,
+                    statistics.mean,
+                    statistics.max,
+                    statistics.sum,
+                    statistics.state,
+                    statistics_meta.statistic_id,
+                    statistics_meta.source,
+                    statistics_meta.unit_of_measurement,
+                    statistics_meta.has_mean,
+                    statistics_meta.has_sum
+                FROM statistics
+                JOIN statistics_meta
+                ON statistics.metadata_id = statistics_meta.id
+            )
+            SELECT *
+            FROM combined_states
+            WHERE 
+                statistic_id IN {sensors_str}
+            ORDER BY created_ts DESC
+        """
+
+        if limit is not None:
+            query += f"LIMIT {limit}"
+        print(query)
+        query = text(query)
+        df = pd.read_sql_query(query, con=self.con)
+        print(f"The returned Pandas dataframe has {df.shape[0]} rows of data.")
+        return df
diff --git a/notebooks/Getting started with detective.ipynb b/notebooks/Getting started with detective.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 27,
    "metadata": {
     "collapsed": false,
     "inputHidden": false,
@@ -92,7 +92,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -109,7 +109,7 @@
        " 'zone.home']"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -127,7 +127,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -159,7 +159,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
@@ -204,7 +204,7 @@
        "0     0     1.680324e+09  zone.home"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -225,7 +225,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -260,7 +260,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -340,7 +340,7 @@
        "5  2023-04-01T05:39:42+00:00     1.680324e+09      sensor.sun_next_dawn"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -358,7 +358,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -367,7 +367,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -407,7 +407,7 @@
        "Index: []"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -425,14 +425,110 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
     "df['day_of_week'] = df['last_updated_ts'].apply(lambda x : x.dayofweek)\n",
     "df['is_temperature'] = df['entity_id'].apply(lambda x : 'temperature' in x)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can fetch [long-term statistics](https://data.home-assistant.io/docs/statistics/) using a separate function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "            WITH combined_states AS (\n",
+      "                SELECT\n",
+      "                    statistics.created_ts,\n",
+      "                    statistics.start_ts,\n",
+      "                    statistics.mean,\n",
+      "                    statistics.max,\n",
+      "                    statistics.sum,\n",
+      "                    statistics_meta.statistic_id,\n",
+      "                    statistics_meta.source,\n",
+      "                    statistics_meta.unit_of_measurement,\n",
+      "                    statistics_meta.has_mean,\n",
+      "                    statistics_meta.has_sum\n",
+      "                FROM statistics\n",
+      "                JOIN statistics_meta\n",
+      "                ON statistics.metadata_id = statistics_meta.id\n",
+      "            )\n",
+      "            SELECT *\n",
+      "            FROM combined_states\n",
+      "            WHERE \n",
+      "                statistic_id IN ('sensor.temperature', 'sensor:temperature')\n",
+      "            ORDER BY created_ts DESC\n",
+      "        \n",
+      "The returned Pandas dataframe has 0 rows of data.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>created_ts</th>\n",
+       "      <th>start_ts</th>\n",
+       "      <th>mean</th>\n",
+       "      <th>max</th>\n",
+       "      <th>sum</th>\n",
+       "      <th>statistic_id</th>\n",
+       "      <th>source</th>\n",
+       "      <th>unit_of_measurement</th>\n",
+       "      <th>has_mean</th>\n",
+       "      <th>has_sum</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [created_ts, start_ts, mean, max, sum, statistic_id, source, unit_of_measurement, has_mean, has_sum]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_long_term = db.fetch_all_statistics_of(('sensor.temperature',), limit=None)\n",
+    "df_long_term"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -444,7 +540,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.8.5 ('venv': venv)",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
@@ -458,15 +554,10 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.12.0"
   },
   "nteract": {
    "version": "0.15.0"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "2af4a7918fba5fbcf89f5d2677b0e673882f4b74674337f98a681302e7f6b461"
-   }
   }
  },
  "nbformat": 4,
diff --git a/tests/test_db.py b/tests/test_db.py
@@ -14,3 +14,6 @@ def test_db():
 
     df = db.fetch_all_sensor_data(limit=100000)
     assert df is not None
+
+    df = db.fetch_all_statistics_of(("sensor.kitchen", "sensor.living_room", "sensor.ac"), limit=100000)
+    assert df is not None