Skip to content

Commit 56524dc

Browse files
Create docker-compose.yaml
1 parent 7721a71 commit 56524dc

1 file changed

Lines changed: 350 additions & 0 deletions

File tree

Lines changed: 350 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
#
18+
19+
# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
20+
#
21+
# WARNING: This configuration is for local development. Do not use it in a production deployment.
22+
#
23+
# This configuration supports basic configuration using environment variables or an .env file
24+
# The following variables are supported:
25+
#
26+
# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow.
27+
# Default: apache/airflow:|version|
28+
# AIRFLOW_UID - User ID in Airflow containers
29+
# Default: 50000
30+
# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode
31+
#
32+
# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested).
33+
# Default: airflow
34+
# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested).
35+
# Default: airflow
36+
# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers.
37+
# Default: ''
38+
#
39+
# Feel free to modify this file to suit your needs.
40+
---
41+
version: '3'
42+
x-airflow-common:
43+
&airflow-common
44+
# In order to add custom dependencies or upgrade provider packages you can use your extended image.
45+
# Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml
46+
# and uncomment the "build" line below, Then run `docker-compose build` to build the images.
47+
#image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.2.2-python3.8}
48+
#build: .
49+
image: meaningfy/airflow:latest
50+
env_file:
51+
- ../../.env
52+
environment:
53+
&airflow-common-env
54+
AIRFLOW__CORE__PARALLELISM: 64
55+
AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 32
56+
AIRFLOW__SCHEDULER__PARSING_PROCESSES: 4
57+
AIRFLOW__CELERY__WORKER_CONCURRENCY: 8
58+
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@ted-data.eu/airflow
59+
AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@ted-data.eu/airflow
60+
AIRFLOW__CELERY__BROKER_URL: redis://:@ted-data.eu:6379/0
61+
#AIRFLOW__CORE__FERNET_KEY: ${FERNET_KEY}
62+
#AIRFLOW__WEBSERVER__SECRET_KEY: ${SECRET_KEY}
63+
IS_PRIME_ENV: 'true'
64+
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
65+
#AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
66+
#AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
67+
#AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
68+
AIRFLOW__CORE__FERNET_KEY: ''
69+
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
70+
AIRFLOW__CORE__ENABLE_XCOM_PICKLING: "true"
71+
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
72+
AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth'
73+
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
74+
VAULT_TOKEN: ${VAULT_TOKEN}
75+
VAULT_ADDR: ${VAULT_ADDR}
76+
ENVIRONMENT: ${ENVIRONMENT}
77+
PYTHONPATH: /opt/airflow/
78+
AIRFLOW_HOME: /opt/airflow
79+
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
80+
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
81+
DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS}
82+
extra_hosts:
83+
- "hermes-worker:${HERMES_IP_ADDRESS}"
84+
- "srv-worker:${SRV_IP_ADDRESS}"
85+
volumes:
86+
# - ./config/airflow.cfg:/opt/airflow/airflow.cfg
87+
- ${AIRFLOW_INFRA_FOLDER}/.env:/opt/airflow/.env
88+
- ${AIRFLOW_INFRA_FOLDER}/dags:/opt/airflow/dags
89+
- ${AIRFLOW_INFRA_FOLDER}/logs:/opt/airflow/logs
90+
- ${AIRFLOW_INFRA_FOLDER}/plugins:/opt/airflow/plugins
91+
- ${AIRFLOW_INFRA_FOLDER}/ted_sws:/opt/airflow/ted_sws
92+
- ${AIRFLOW_INFRA_FOLDER}/tests:/opt/airflow/tests
93+
user: "${AIRFLOW_UID:-50000}:0"
94+
command: bash -c "export PYTHONPATH='/opt/airflow/'"
95+
depends_on:
96+
&airflow-common-depends-on
97+
redis:
98+
condition: service_healthy
99+
postgres:
100+
condition: service_healthy
101+
102+
services:
103+
postgres:
104+
image: postgres:13
105+
container_name: postgres-airflow-${ENVIRONMENT}
106+
environment:
107+
POSTGRES_USER: airflow
108+
POSTGRES_PASSWORD: airflow
109+
POSTGRES_DB: airflow
110+
ports:
111+
- "5432:5432"
112+
volumes:
113+
- postgres-db-volume:/var/lib/postgresql/data
114+
healthcheck:
115+
test: ["CMD", "pg_isready", "-U", "airflow"]
116+
interval: 5s
117+
retries: 5
118+
restart: unless-stopped
119+
networks:
120+
- airflow
121+
122+
redis:
123+
image: redis:latest
124+
container_name: redis-airflow-${ENVIRONMENT}
125+
expose:
126+
- 6379:6379
127+
healthcheck:
128+
test: ["CMD", "redis-cli", "ping"]
129+
interval: 5s
130+
timeout: 30s
131+
retries: 50
132+
restart: unless-stopped
133+
networks:
134+
- airflow
135+
136+
airflow-webserver:
137+
<<: *airflow-common
138+
container_name: airflow-webserver-${ENVIRONMENT}
139+
command: webserver
140+
restart: unless-stopped
141+
networks:
142+
- airflow
143+
- proxy-net
144+
labels:
145+
#### Labels define the behavior and rules of the traefik proxy for this container ####
146+
- "traefik.enable=true" # <== Enable traefik to proxy this container
147+
- "traefik.http.routers.${ENVIRONMENT}-airflow.rule=Host(`airflow.${SUBDOMAIN}${DOMAIN}`)" # <== Your Domain Name goes here for the http rule
148+
- "traefik.http.routers.${ENVIRONMENT}-airflow.entrypoints=web" # <== Defining the entrypoint for http, **ref: line 30
149+
150+
- "traefik.http.routers.airflow.middlewares=redirect@file" # <== This is a middleware to redirect to https
151+
- "traefik.http.routers.${ENVIRONMENT}-airflow-secured.rule=Host(`airflow.${SUBDOMAIN}${DOMAIN}`)" # <== Your Domain Name for the https rule
152+
- "traefik.http.routers.${ENVIRONMENT}-airflow-secured.entrypoints=web-secured" # <== Defining entrypoint for https, **ref: line 31
153+
- "traefik.http.routers.${ENVIRONMENT}-airflow-secured.tls.certresolver=mytlschallenge" # <== Defining certsresolvers for https
154+
- "traefik.http.services.${ENVIRONMENT}-airflow-secured.loadbalancer.server.port=8080"
155+
156+
- "traefik.http.routers.${ENVIRONMENT}-airflow.middlewares=admin-auth"
157+
- "traefik.http.middlewares.admin-auth.basicauth.users=admin:$$apr1$$O4NQPpRP$$P5LlBzvwUi3UuuRU9KuxY."
158+
159+
depends_on:
160+
<<: *airflow-common-depends-on
161+
airflow-init:
162+
condition: service_completed_successfully
163+
164+
165+
166+
167+
airflow-scheduler:
168+
<<: *airflow-common
169+
container_name: airflow-scheduler-${ENVIRONMENT}
170+
command: scheduler
171+
healthcheck:
172+
test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"']
173+
interval: 10s
174+
timeout: 10s
175+
retries: 5
176+
restart: unless-stopped
177+
networks:
178+
- airflow
179+
- common-ext
180+
depends_on:
181+
<<: *airflow-common-depends-on
182+
airflow-init:
183+
condition: service_completed_successfully
184+
185+
airflow-worker:
186+
<<: *airflow-common
187+
container_name: airflow-worker-${ENVIRONMENT}
188+
command: celery worker
189+
hostname: ${WORKER_HOSTNAME}
190+
ports:
191+
- "8793:8793"
192+
healthcheck:
193+
test: ["CMD-SHELL",'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"']
194+
interval: 10s
195+
timeout: 10s
196+
retries: 5
197+
environment:
198+
<<: *airflow-common-env
199+
# Required to handle warm shutdown of the celery workers properly
200+
# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
201+
DUMB_INIT_SETSID: "0"
202+
restart: unless-stopped
203+
networks:
204+
- airflow
205+
- common-ext
206+
depends_on:
207+
<<: *airflow-common-depends-on
208+
airflow-init:
209+
condition: service_completed_successfully
210+
211+
airflow-triggerer:
212+
<<: *airflow-common
213+
container_name: airflow-triggerer-${ENVIRONMENT}
214+
command: triggerer
215+
healthcheck:
216+
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
217+
interval: 10s
218+
timeout: 10s
219+
retries: 5
220+
restart: unless-stopped
221+
networks:
222+
- airflow
223+
- common-ext
224+
depends_on:
225+
<<: *airflow-common-depends-on
226+
airflow-init:
227+
condition: service_completed_successfully
228+
229+
airflow-init:
230+
<<: *airflow-common
231+
entrypoint: /bin/bash
232+
# yamllint disable rule:line-length
233+
command:
234+
- -c
235+
- |
236+
function ver() {
237+
printf "%04d%04d%04d%04d" $${1//./ }
238+
}
239+
airflow_version=$$(gosu airflow airflow version)
240+
airflow_version_comparable=$$(ver $${airflow_version})
241+
min_airflow_version=2.2.0
242+
min_airflow_version_comparable=$$(ver $${min_airflow_version})
243+
if (( airflow_version_comparable < min_airflow_version_comparable )); then
244+
echo
245+
echo -e "\033[1;31mERROR!!!: Too old Airflow version $${airflow_version}!\e[0m"
246+
echo "The minimum Airflow version supported: $${min_airflow_version}. Only use this or higher!"
247+
echo
248+
exit 1
249+
fi
250+
if [[ -z "${AIRFLOW_UID}" ]]; then
251+
echo
252+
echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m"
253+
echo "If you are on Linux, you SHOULD follow the instructions below to set "
254+
echo "AIRFLOW_UID environment variable, otherwise files will be owned by root."
255+
echo "For other operating systems you can get rid of the warning with manually created .env file:"
256+
echo " See: https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html#setting-the-right-airflow-user"
257+
echo
258+
fi
259+
one_meg=1048576
260+
mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg))
261+
cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat)
262+
disk_available=$$(df / | tail -1 | awk '{print $$4}')
263+
warning_resources="false"
264+
if (( mem_available < 4000 )) ; then
265+
echo
266+
echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m"
267+
echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))"
268+
echo
269+
warning_resources="true"
270+
fi
271+
if (( cpus_available < 2 )); then
272+
echo
273+
echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m"
274+
echo "At least 2 CPUs recommended. You have $${cpus_available}"
275+
echo
276+
warning_resources="true"
277+
fi
278+
if (( disk_available < one_meg * 10 )); then
279+
echo
280+
echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m"
281+
echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))"
282+
echo
283+
warning_resources="true"
284+
fi
285+
if [[ $${warning_resources} == "true" ]]; then
286+
echo
287+
echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m"
288+
echo "Please follow the instructions to increase amount of resources available:"
289+
echo " https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html#before-you-begin"
290+
echo
291+
fi
292+
exec /entrypoint airflow version
293+
# yamllint enable rule:line-length
294+
environment:
295+
<<: *airflow-common-env
296+
_AIRFLOW_DB_UPGRADE: 'true'
297+
_AIRFLOW_WWW_USER_CREATE: 'true'
298+
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
299+
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
300+
user: "0:0"
301+
volumes:
302+
- .:/sources
303+
networks:
304+
- airflow
305+
306+
airflow-cli:
307+
<<: *airflow-common
308+
profiles:
309+
- debug
310+
environment:
311+
<<: *airflow-common-env
312+
CONNECTION_CHECK_MAX_COUNT: "0"
313+
# Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252
314+
command:
315+
- bash
316+
- -c
317+
- airflow
318+
networks:
319+
- airflow
320+
321+
flower:
322+
<<: *airflow-common
323+
container_name: airflow-flower-${ENVIRONMENT}
324+
command: celery flower
325+
restart: unless-stopped
326+
networks:
327+
- airflow
328+
- proxy-net
329+
depends_on:
330+
<<: *airflow-common-depends-on
331+
airflow-init:
332+
condition: service_completed_successfully
333+
334+
335+
volumes:
336+
postgres-db-volume:
337+
name: airflow-${ENVIRONMENT}
338+
339+
340+
networks:
341+
airflow:
342+
internal: true
343+
name: airflow-${ENVIRONMENT}
344+
networks:
345+
common-ext:
346+
external:
347+
name: common-ext-${ENVIRONMENT}
348+
proxy-net:
349+
external:
350+
name: proxy-net

0 commit comments

Comments
 (0)