elementary-data
diff --git a/‎.github/workflows/test-all-warehouses.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/test-all-warehouses.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/test-warehouse.yml‎
Lines changed: 12 additions & 2 deletions b/‎.github/workflows/test-warehouse.yml‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎integration_tests/dbt_project/dbt_project.yml‎
Lines changed: 2 additions & 0 deletions b/‎integration_tests/dbt_project/dbt_project.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎integration_tests/dbt_project/macros/ci_schemas_cleanup/test_drop_stale_ci_schemas.sql‎
Lines changed: 5 additions & 0 deletions b/‎integration_tests/dbt_project/macros/ci_schemas_cleanup/test_drop_stale_ci_schemas.sql‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎integration_tests/dbt_project/macros/clear_env.sql‎
Lines changed: 5 additions & 0 deletions b/‎integration_tests/dbt_project/macros/clear_env.sql‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎integration_tests/dbt_project/macros/get_anomaly_config.sql‎
Lines changed: 24 additions & 1 deletion b/‎integration_tests/dbt_project/macros/get_anomaly_config.sql‎
Lines changed: 24 additions & 1 deletion
diff --git a/‎integration_tests/dbt_project/macros/schema_utils/list_schemas.sql‎
Lines changed: 9 additions & 0 deletions b/‎integration_tests/dbt_project/macros/schema_utils/list_schemas.sql‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎integration_tests/dbt_project/macros/schema_utils/schema_exists.sql‎
Lines changed: 6 additions & 0 deletions b/‎integration_tests/dbt_project/macros/schema_utils/schema_exists.sql‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎integration_tests/docker-compose-spark.yml‎
Lines changed: 41 additions & 0 deletions b/‎integration_tests/docker-compose-spark.yml‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎integration_tests/docker/spark/Dockerfile‎
Lines changed: 30 additions & 0 deletions b/‎integration_tests/docker/spark/Dockerfile‎
Lines changed: 30 additions & 0 deletions
@@ -48,7 +48,7 @@ jobs:
         dbt-version:
           ${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) ||
           fromJSON('["latest_official", "latest_pre"]') }}
-        warehouse-type: [postgres, clickhouse, trino, dremio, duckdb]
+        warehouse-type: [postgres, clickhouse, trino, dremio, spark, duckdb]
         exclude:
           # latest_pre is only tested on postgres
           - dbt-version: latest_pre
@@ -57,6 +57,8 @@ jobs:
             warehouse-type: trino
           - dbt-version: latest_pre
             warehouse-type: dremio
+          - dbt-version: latest_pre
+            warehouse-type: spark
           - dbt-version: latest_pre
             warehouse-type: duckdb
     uses: ./.github/workflows/test-warehouse.yml
 
@@ -100,6 +100,16 @@ jobs:
           timeout 180 bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} dremio 2>/dev/null)" = "healthy" ]; do sleep 5; done'
           echo "Dremio is healthy."
 
+      - name: Start Spark
+        if: inputs.warehouse-type == 'spark'
+        working-directory: ${{ env.TESTS_DIR }}
+        run: |
+          docker compose -f docker-compose-spark.yml build
+          docker compose -f docker-compose-spark.yml up -d
+          echo "Waiting for Spark Thrift Server to become healthy..."
+          timeout 180 bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} spark-thrift 2>/dev/null)" = "healthy" ]; do sleep 5; done'
+          echo "Spark Thrift Server is healthy."
+
       - name: Setup Python
         uses: actions/setup-python@v6
         with:
@@ -108,7 +118,7 @@ jobs:
 
       - name: Install Spark requirements
         if: inputs.warehouse-type == 'spark'
-        run: sudo apt-get install python-dev libsasl2-dev gcc
+        run: sudo apt-get update && sudo apt-get install -y python3-dev libsasl2-dev gcc
 
       - name: Install compatible databricks connector (not limited in older dbt-databricks versions)
         if: startsWith(inputs.warehouse-type, 'databricks') && inputs.dbt-version < '1.7.0'
@@ -168,7 +178,7 @@ jobs:
 
       - name: Test
         working-directory: "${{ env.TESTS_DIR }}/tests"
-        run: py.test -n8 -vvv --target "${{ inputs.warehouse-type }}" --junit-xml=test-results.xml --html=detailed_report_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}.html --self-contained-html --clear-on-end ${{ (inputs.dbt-version == 'fusion' && '--runner-method fusion') || '' }}
+        run: py.test -n${{ (inputs.warehouse-type == 'spark' && '4') || '8' }} -vvv --target "${{ inputs.warehouse-type }}" --junit-xml=test-results.xml --html=detailed_report_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}.html --self-contained-html --clear-on-end ${{ (inputs.dbt-version == 'fusion' && '--runner-method fusion') || '' }}
 
       - name: Upload test results
         if: always()
 
@@ -23,7 +23,9 @@ models:
   elementary_tests:
     tmp:
       +materialized: table
+    +file_format: "{{ 'delta' if target.type == 'spark' else none }}"
 
   elementary:
     +schema: elementary
     +enabled: "{{ var('elementary_enabled', True) }}"
+    +file_format: "{{ 'delta' if target.type == 'spark' else none }}"
@@ -73,3 +73,8 @@
   {% do run_query("CREATE DATABASE IF NOT EXISTS `" ~ schema_name ~ "`") %}
   {% do adapter.commit() %}
 {% endmacro %}
+
+{% macro spark__edr_create_schema(database, schema_name) %}
+  {% set safe_schema = schema_name | replace("`", "``") %}
+  {% do run_query("CREATE DATABASE IF NOT EXISTS `" ~ safe_schema ~ "`") %}
+{% endmacro %}
@@ -19,6 +19,11 @@
     {% do adapter.commit() %}
 {% endmacro %}
 
+{% macro spark__edr_drop_schema(database_name, schema_name) %}
+    {% set safe_schema = schema_name | replace("`", "``") %}
+    {% do run_query("DROP DATABASE IF EXISTS `" ~ safe_schema ~ "` CASCADE") %}
+{% endmacro %}
+
 {% macro duckdb__edr_drop_schema(database_name, schema_name) %}
     {% do run_query("DROP SCHEMA IF EXISTS " ~ schema_name ~ " CASCADE") %}
     {% do adapter.commit() %}
 
@@ -25,6 +25,29 @@
   {% do return(elementary.get_anomalies_test_configuration(api.Relation.create("db", "schema", "mock_model"), **config)[0]) %}
 {% endmacro %}
 
+{% macro spark__get_anomaly_config(model_config, config) %}
+  {% set mock_model = {
+    "alias": "mock_model",
+    "config": {
+      "elementary": model_config
+    }
+  } %}
+  {# trick elementary into thinking this is the running model #}
+  {% do context.update({
+    "model": {
+      "depends_on": {
+        "nodes": ["id"]
+      }
+    },
+    "graph": {
+      "nodes": {
+        "id": mock_model
+      }
+    }
+  }) %}
+  {% do return(elementary.get_anomalies_test_configuration(api.Relation.create("schema", "schema", "mock_model"), **config)[0]) %}
+{% endmacro %}
+
 {% macro clickhouse__get_anomaly_config(model_config, config) %}
   {% set mock_model = {
     "alias": "mock_model",
@@ -46,4 +69,4 @@
     }
   }) %}
   {% do return(elementary.get_anomalies_test_configuration(api.Relation.create("schema", "schema", "mock_model"), **config)[0]) %}
-{% endmacro %}
+{% endmacro %}
@@ -38,3 +38,12 @@
   {% endfor %}
   {% do return(schemas) %}
 {% endmacro %}
+
+{% macro spark__edr_list_schemas(database) %}
+  {% set results = run_query('SHOW DATABASES') %}
+  {% set schemas = [] %}
+  {% for row in results %}
+    {% do schemas.append(row[0]) %}
+  {% endfor %}
+  {% do return(schemas) %}
+{% endmacro %}
@@ -29,3 +29,9 @@
   {% set result = run_query("SELECT 1 FROM system.databases WHERE name = '" ~ safe_schema ~ "' LIMIT 1") %}
   {% do return(result | length > 0) %}
 {% endmacro %}
+
+{% macro spark__edr_schema_exists(database, schema_name) %}
+  {% set safe_schema = schema_name | replace("'", "''") %}
+  {% set result = run_query("SHOW DATABASES LIKE '" ~ safe_schema ~ "'") %}
+  {% do return(result | length > 0) %}
+{% endmacro %}
@@ -0,0 +1,41 @@
+version: "3.8"
+
+services:
+  spark-thrift:
+    container_name: spark-thrift
+    build:
+      context: ./docker/spark
+      dockerfile: Dockerfile
+    ports:
+      - "10000:10000"
+      - "4040:4040"
+    depends_on:
+      - spark-hive-metastore
+    command: >
+      --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
+      --name Thrift JDBC/ODBC Server
+    healthcheck:
+      test: ["CMD-SHELL", "nc -z localhost 10000"]
+      interval: 10s
+      timeout: 5s
+      retries: 20
+      start_period: 30s
+    volumes:
+      - spark-warehouse:/spark-warehouse/
+      - ./docker/spark/hive-site.xml:/usr/spark/conf/hive-site.xml
+      - ./docker/spark/spark-defaults.conf:/usr/spark/conf/spark-defaults.conf
+    environment:
+      - WAIT_FOR=spark-hive-metastore:5432
+
+  spark-hive-metastore:
+    image: postgres:15-alpine
+    volumes:
+      - hive-metastore:/var/lib/postgresql/data
+    environment:
+      - POSTGRES_USER=dbt
+      - POSTGRES_PASSWORD=dbt
+      - POSTGRES_DB=metastore
+
+volumes:
+  spark-warehouse:
+  hive-metastore:
@@ -0,0 +1,30 @@
+ARG OPENJDK_VERSION=8
+FROM eclipse-temurin:${OPENJDK_VERSION}-jre
+
+ARG SPARK_VERSION=3.3.2
+ARG HADOOP_VERSION=3
+ARG DELTA_VERSION=2.2.0
+
+ENV SPARK_HOME /usr/spark
+ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends wget netcat-openbsd procps libpostgresql-jdbc-java && \
+    wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
+    ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
+    wget -q "https://repo1.maven.org/maven2/io/delta/delta-core_2.12/${DELTA_VERSION}/delta-core_2.12-${DELTA_VERSION}.jar" \
+         -P /usr/spark/jars/ && \
+    wget -q "https://repo1.maven.org/maven2/io/delta/delta-storage/${DELTA_VERSION}/delta-storage-${DELTA_VERSION}.jar" \
+         -P /usr/spark/jars/ && \
+    apt-get remove -y wget && \
+    apt-get autoremove -y && \
+    apt-get clean
+
+COPY entrypoint.sh /scripts/
+RUN chmod +x /scripts/entrypoint.sh
+
+ENTRYPOINT ["/scripts/entrypoint.sh"]
+CMD ["--help"]