|
| 1 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +# or more contributor license agreements. See the NOTICE file |
| 3 | +# distributed with this work for additional information |
| 4 | +# regarding copyright ownership. The ASF licenses this file |
| 5 | +# to you under the Apache License, Version 2.0 (the |
| 6 | +# "License"); you may not use this file except in compliance |
| 7 | +# with the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, |
| 12 | +# software distributed under the License is distributed on an |
| 13 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +# KIND, either express or implied. See the License for the |
| 15 | +# specific language governing permissions and limitations |
| 16 | +# under the License. |
| 17 | +# |
| 18 | + |
| 19 | +# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL. |
| 20 | +# |
| 21 | +# WARNING: This configuration is for local development. Do not use it in a production deployment. |
| 22 | +# |
| 23 | +# This configuration supports basic configuration using environment variables or an .env file |
| 24 | +# The following variables are supported: |
| 25 | +# |
| 26 | +# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. |
| 27 | +# Default: apache/airflow:|version| |
| 28 | +# AIRFLOW_UID - User ID in Airflow containers |
| 29 | +# Default: 50000 |
| 30 | +# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode |
| 31 | +# |
| 32 | +# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested). |
| 33 | +# Default: airflow |
| 34 | +# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested). |
| 35 | +# Default: airflow |
| 36 | +# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers. |
| 37 | +# Default: '' |
| 38 | +# |
| 39 | +# Feel free to modify this file to suit your needs. |
| 40 | +--- |
| 41 | +version: '3' |
| 42 | +x-airflow-common: |
| 43 | + &airflow-common |
| 44 | + # In order to add custom dependencies or upgrade provider packages you can use your extended image. |
| 45 | + # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml |
| 46 | + # and uncomment the "build" line below, Then run `docker-compose build` to build the images. |
| 47 | + #image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.2.2-python3.8} |
| 48 | + #build: . |
| 49 | + image: meaningfy/airflow:latest |
| 50 | + env_file: |
| 51 | + - ../../.env |
| 52 | + environment: |
| 53 | + &airflow-common-env |
| 54 | + AIRFLOW__CORE__PARALLELISM: 64 |
| 55 | + AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 32 |
| 56 | + AIRFLOW__SCHEDULER__PARSING_PROCESSES: 4 |
| 57 | + AIRFLOW__CELERY__WORKER_CONCURRENCY: 8 |
| 58 | + AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@ted-data.eu/airflow |
| 59 | + AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@ted-data.eu/airflow |
| 60 | + AIRFLOW__CELERY__BROKER_URL: redis://:@ted-data.eu:6379/0 |
| 61 | + #AIRFLOW__CORE__FERNET_KEY: ${FERNET_KEY} |
| 62 | + #AIRFLOW__WEBSERVER__SECRET_KEY: ${SECRET_KEY} |
| 63 | + IS_PRIME_ENV: 'true' |
| 64 | + AIRFLOW__CORE__EXECUTOR: CeleryExecutor |
| 65 | + #AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow |
| 66 | + #AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow |
| 67 | + #AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 |
| 68 | + AIRFLOW__CORE__FERNET_KEY: '' |
| 69 | + AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' |
| 70 | + AIRFLOW__CORE__ENABLE_XCOM_PICKLING: "true" |
| 71 | + AIRFLOW__CORE__LOAD_EXAMPLES: 'false' |
| 72 | + AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth' |
| 73 | + _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} |
| 74 | + VAULT_TOKEN: ${VAULT_TOKEN} |
| 75 | + VAULT_ADDR: ${VAULT_ADDR} |
| 76 | + ENVIRONMENT: ${ENVIRONMENT} |
| 77 | + PYTHONPATH: /opt/airflow/ |
| 78 | + AIRFLOW_HOME: /opt/airflow |
| 79 | + RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar |
| 80 | + XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar |
| 81 | + DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS} |
| 82 | + extra_hosts: |
| 83 | + - "hermes-worker:${HERMES_IP_ADDRESS}" |
| 84 | + - "srv-worker:${SRV_IP_ADDRESS}" |
| 85 | + volumes: |
| 86 | +# - ./config/airflow.cfg:/opt/airflow/airflow.cfg |
| 87 | + - ${AIRFLOW_INFRA_FOLDER}/.env:/opt/airflow/.env |
| 88 | + - ${AIRFLOW_INFRA_FOLDER}/dags:/opt/airflow/dags |
| 89 | + - ${AIRFLOW_INFRA_FOLDER}/logs:/opt/airflow/logs |
| 90 | + - ${AIRFLOW_INFRA_FOLDER}/plugins:/opt/airflow/plugins |
| 91 | + - ${AIRFLOW_INFRA_FOLDER}/ted_sws:/opt/airflow/ted_sws |
| 92 | + - ${AIRFLOW_INFRA_FOLDER}/tests:/opt/airflow/tests |
| 93 | + user: "${AIRFLOW_UID:-50000}:0" |
| 94 | + command: bash -c "export PYTHONPATH='/opt/airflow/'" |
| 95 | + depends_on: |
| 96 | + &airflow-common-depends-on |
| 97 | + redis: |
| 98 | + condition: service_healthy |
| 99 | + postgres: |
| 100 | + condition: service_healthy |
| 101 | + |
| 102 | +services: |
| 103 | + postgres: |
| 104 | + image: postgres:13 |
| 105 | + container_name: postgres-airflow-${ENVIRONMENT} |
| 106 | + environment: |
| 107 | + POSTGRES_USER: airflow |
| 108 | + POSTGRES_PASSWORD: airflow |
| 109 | + POSTGRES_DB: airflow |
| 110 | + ports: |
| 111 | + - "5432:5432" |
| 112 | + volumes: |
| 113 | + - postgres-db-volume:/var/lib/postgresql/data |
| 114 | + healthcheck: |
| 115 | + test: ["CMD", "pg_isready", "-U", "airflow"] |
| 116 | + interval: 5s |
| 117 | + retries: 5 |
| 118 | + restart: unless-stopped |
| 119 | + networks: |
| 120 | + - airflow |
| 121 | + |
| 122 | + redis: |
| 123 | + image: redis:latest |
| 124 | + container_name: redis-airflow-${ENVIRONMENT} |
| 125 | + expose: |
| 126 | + - 6379:6379 |
| 127 | + healthcheck: |
| 128 | + test: ["CMD", "redis-cli", "ping"] |
| 129 | + interval: 5s |
| 130 | + timeout: 30s |
| 131 | + retries: 50 |
| 132 | + restart: unless-stopped |
| 133 | + networks: |
| 134 | + - airflow |
| 135 | + |
| 136 | + airflow-webserver: |
| 137 | + <<: *airflow-common |
| 138 | + container_name: airflow-webserver-${ENVIRONMENT} |
| 139 | + command: webserver |
| 140 | + restart: unless-stopped |
| 141 | + networks: |
| 142 | + - airflow |
| 143 | + - proxy-net |
| 144 | + labels: |
| 145 | + #### Labels define the behavior and rules of the traefik proxy for this container #### |
| 146 | + - "traefik.enable=true" # <== Enable traefik to proxy this container |
| 147 | + - "traefik.http.routers.${ENVIRONMENT}-airflow.rule=Host(`airflow.${SUBDOMAIN}${DOMAIN}`)" # <== Your Domain Name goes here for the http rule |
| 148 | + - "traefik.http.routers.${ENVIRONMENT}-airflow.entrypoints=web" # <== Defining the entrypoint for http, **ref: line 30 |
| 149 | + |
| 150 | + - "traefik.http.routers.airflow.middlewares=redirect@file" # <== This is a middleware to redirect to https |
| 151 | + - "traefik.http.routers.${ENVIRONMENT}-airflow-secured.rule=Host(`airflow.${SUBDOMAIN}${DOMAIN}`)" # <== Your Domain Name for the https rule |
| 152 | + - "traefik.http.routers.${ENVIRONMENT}-airflow-secured.entrypoints=web-secured" # <== Defining entrypoint for https, **ref: line 31 |
| 153 | + - "traefik.http.routers.${ENVIRONMENT}-airflow-secured.tls.certresolver=mytlschallenge" # <== Defining certsresolvers for https |
| 154 | + - "traefik.http.services.${ENVIRONMENT}-airflow-secured.loadbalancer.server.port=8080" |
| 155 | + |
| 156 | + - "traefik.http.routers.${ENVIRONMENT}-airflow.middlewares=admin-auth" |
| 157 | + - "traefik.http.middlewares.admin-auth.basicauth.users=admin:$$apr1$$O4NQPpRP$$P5LlBzvwUi3UuuRU9KuxY." |
| 158 | + |
| 159 | + depends_on: |
| 160 | + <<: *airflow-common-depends-on |
| 161 | + airflow-init: |
| 162 | + condition: service_completed_successfully |
| 163 | + |
| 164 | + |
| 165 | + |
| 166 | + |
| 167 | + airflow-scheduler: |
| 168 | + <<: *airflow-common |
| 169 | + container_name: airflow-scheduler-${ENVIRONMENT} |
| 170 | + command: scheduler |
| 171 | + healthcheck: |
| 172 | + test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"'] |
| 173 | + interval: 10s |
| 174 | + timeout: 10s |
| 175 | + retries: 5 |
| 176 | + restart: unless-stopped |
| 177 | + networks: |
| 178 | + - airflow |
| 179 | + - common-ext |
| 180 | + depends_on: |
| 181 | + <<: *airflow-common-depends-on |
| 182 | + airflow-init: |
| 183 | + condition: service_completed_successfully |
| 184 | + |
| 185 | + airflow-worker: |
| 186 | + <<: *airflow-common |
| 187 | + container_name: airflow-worker-${ENVIRONMENT} |
| 188 | + command: celery worker |
| 189 | + hostname: ${WORKER_HOSTNAME} |
| 190 | + ports: |
| 191 | + - "8793:8793" |
| 192 | + healthcheck: |
| 193 | + test: ["CMD-SHELL",'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'] |
| 194 | + interval: 10s |
| 195 | + timeout: 10s |
| 196 | + retries: 5 |
| 197 | + environment: |
| 198 | + <<: *airflow-common-env |
| 199 | + # Required to handle warm shutdown of the celery workers properly |
| 200 | + # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation |
| 201 | + DUMB_INIT_SETSID: "0" |
| 202 | + restart: unless-stopped |
| 203 | + networks: |
| 204 | + - airflow |
| 205 | + - common-ext |
| 206 | + depends_on: |
| 207 | + <<: *airflow-common-depends-on |
| 208 | + airflow-init: |
| 209 | + condition: service_completed_successfully |
| 210 | + |
| 211 | + airflow-triggerer: |
| 212 | + <<: *airflow-common |
| 213 | + container_name: airflow-triggerer-${ENVIRONMENT} |
| 214 | + command: triggerer |
| 215 | + healthcheck: |
| 216 | + test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] |
| 217 | + interval: 10s |
| 218 | + timeout: 10s |
| 219 | + retries: 5 |
| 220 | + restart: unless-stopped |
| 221 | + networks: |
| 222 | + - airflow |
| 223 | + - common-ext |
| 224 | + depends_on: |
| 225 | + <<: *airflow-common-depends-on |
| 226 | + airflow-init: |
| 227 | + condition: service_completed_successfully |
| 228 | + |
| 229 | + airflow-init: |
| 230 | + <<: *airflow-common |
| 231 | + entrypoint: /bin/bash |
| 232 | + # yamllint disable rule:line-length |
| 233 | + command: |
| 234 | + - -c |
| 235 | + - | |
| 236 | + function ver() { |
| 237 | + printf "%04d%04d%04d%04d" $${1//./ } |
| 238 | + } |
| 239 | + airflow_version=$$(gosu airflow airflow version) |
| 240 | + airflow_version_comparable=$$(ver $${airflow_version}) |
| 241 | + min_airflow_version=2.2.0 |
| 242 | + min_airflow_version_comparable=$$(ver $${min_airflow_version}) |
| 243 | + if (( airflow_version_comparable < min_airflow_version_comparable )); then |
| 244 | + echo |
| 245 | + echo -e "\033[1;31mERROR!!!: Too old Airflow version $${airflow_version}!\e[0m" |
| 246 | + echo "The minimum Airflow version supported: $${min_airflow_version}. Only use this or higher!" |
| 247 | + echo |
| 248 | + exit 1 |
| 249 | + fi |
| 250 | + if [[ -z "${AIRFLOW_UID}" ]]; then |
| 251 | + echo |
| 252 | + echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" |
| 253 | + echo "If you are on Linux, you SHOULD follow the instructions below to set " |
| 254 | + echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." |
| 255 | + echo "For other operating systems you can get rid of the warning with manually created .env file:" |
| 256 | + echo " See: https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html#setting-the-right-airflow-user" |
| 257 | + echo |
| 258 | + fi |
| 259 | + one_meg=1048576 |
| 260 | + mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) |
| 261 | + cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) |
| 262 | + disk_available=$$(df / | tail -1 | awk '{print $$4}') |
| 263 | + warning_resources="false" |
| 264 | + if (( mem_available < 4000 )) ; then |
| 265 | + echo |
| 266 | + echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" |
| 267 | + echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" |
| 268 | + echo |
| 269 | + warning_resources="true" |
| 270 | + fi |
| 271 | + if (( cpus_available < 2 )); then |
| 272 | + echo |
| 273 | + echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" |
| 274 | + echo "At least 2 CPUs recommended. You have $${cpus_available}" |
| 275 | + echo |
| 276 | + warning_resources="true" |
| 277 | + fi |
| 278 | + if (( disk_available < one_meg * 10 )); then |
| 279 | + echo |
| 280 | + echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" |
| 281 | + echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" |
| 282 | + echo |
| 283 | + warning_resources="true" |
| 284 | + fi |
| 285 | + if [[ $${warning_resources} == "true" ]]; then |
| 286 | + echo |
| 287 | + echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" |
| 288 | + echo "Please follow the instructions to increase amount of resources available:" |
| 289 | + echo " https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html#before-you-begin" |
| 290 | + echo |
| 291 | + fi |
| 292 | + exec /entrypoint airflow version |
| 293 | + # yamllint enable rule:line-length |
| 294 | + environment: |
| 295 | + <<: *airflow-common-env |
| 296 | + _AIRFLOW_DB_UPGRADE: 'true' |
| 297 | + _AIRFLOW_WWW_USER_CREATE: 'true' |
| 298 | + _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} |
| 299 | + _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} |
| 300 | + user: "0:0" |
| 301 | + volumes: |
| 302 | + - .:/sources |
| 303 | + networks: |
| 304 | + - airflow |
| 305 | + |
| 306 | + airflow-cli: |
| 307 | + <<: *airflow-common |
| 308 | + profiles: |
| 309 | + - debug |
| 310 | + environment: |
| 311 | + <<: *airflow-common-env |
| 312 | + CONNECTION_CHECK_MAX_COUNT: "0" |
| 313 | + # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252 |
| 314 | + command: |
| 315 | + - bash |
| 316 | + - -c |
| 317 | + - airflow |
| 318 | + networks: |
| 319 | + - airflow |
| 320 | + |
| 321 | + flower: |
| 322 | + <<: *airflow-common |
| 323 | + container_name: airflow-flower-${ENVIRONMENT} |
| 324 | + command: celery flower |
| 325 | + restart: unless-stopped |
| 326 | + networks: |
| 327 | + - airflow |
| 328 | + - proxy-net |
| 329 | + depends_on: |
| 330 | + <<: *airflow-common-depends-on |
| 331 | + airflow-init: |
| 332 | + condition: service_completed_successfully |
| 333 | + |
| 334 | + |
| 335 | +volumes: |
| 336 | + postgres-db-volume: |
| 337 | + name: airflow-${ENVIRONMENT} |
| 338 | + |
| 339 | + |
| 340 | +networks: |
| 341 | + airflow: |
| 342 | + internal: true |
| 343 | + name: airflow-${ENVIRONMENT} |
| 344 | + networks: |
| 345 | + common-ext: |
| 346 | + external: |
| 347 | + name: common-ext-${ENVIRONMENT} |
| 348 | + proxy-net: |
| 349 | + external: |
| 350 | + name: proxy-net |
0 commit comments