Docker compose 在不同服务器上运行相同dag的任务

Docker compose 在不同服务器上运行相同dag的任务,docker-compose,airflow,airflow-scheduler,Docker Compose,Airflow,Airflow Scheduler,我需要运行以下dag- dag = DAG('dummy_for_testing', default_args=default_args,schedule_interval=None) t1 = BashOperator( task_id='print_date', bash_command='date', dag=dag) t2 = BashOperator( task_id='print_host', bash_command='hostname

我需要运行以下dag-

dag = DAG('dummy_for_testing', 
default_args=default_args,schedule_interval=None)

t1 = BashOperator(
    task_id='print_date',
    bash_command='date',
    dag=dag)

t2 = BashOperator(
    task_id='print_host',
    bash_command='hostname',
    queue='druid_queue',
    dag=dag)

t3 = BashOperator(
    task_id='print_directory',
    bash_command='pwd',
    dag=dag)

t3.set_upstream(t2)
t2.set_upstream(t1)
其中t1和t3在服务器A上运行,t2在服务器Bqueue=druid_队列上运行。 我目前正在使用设置气流。服务器的yml文件如下所示: 服务器1

version: '2.1'
services:
    redis:
        image: 'redis:3.2.7'
        ports:
            - "10.0.11.4:6999:6379"
        command: redis-server

    postgres:
        image: postgres:9.6
        container_name: postgres-airflow
        ports:
            - "10.0.11.4:5434:5432"
        environment:
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow

    webserver:
        image: puckel/docker-airflow:1.10.2
        container_name: airflow
        restart: always
        depends_on:
            - postgres
            - redis
        environment:
            - LOAD_EX=n
            - FERNET_KEY=<>
            - EXECUTOR=Celery
            - user_logs_config_loc=dags/user_logs/configurations/
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow
        volumes:
            - /data/druid-data/airflow/dags:/usr/local/airflow/dags
            - /var/run/docker.sock:/var/run/docker.sock
        ports:
            - "10.0.11.4:8085:8080"
        command: webserver
        healthcheck:
            test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"]
            interval: 30s

    flower:
        image: puckel/docker-airflow:1.10.2
        restart: always
        depends_on:
            - redis
        environment:
            - EXECUTOR=Celery
        ports:
            - "5555:5555"
        command: flower

    scheduler:
        image: puckel/docker-airflow:1.10.2
        restart: always
        depends_on:
            - webserver
        volumes:
            - /data/druid-data/airflow/dags:/usr/local/airflow/dags
        environment:
            - LOAD_EX=n
            - FERNET_KEY=<>
            - EXECUTOR=Celery
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow
            - user_logs_config_loc=dags/user_logs/configurations/
        command: scheduler

    worker:
        image: puckel/docker-airflow:1.10.2
        restart: always
        depends_on:
            - scheduler
        volumes:
            - /data/druid-data/airflow/dags:/usr/local/airflow/dags
        environment:
            - FERNET_KEY=<>
            - EXECUTOR=Celery
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow
            - user_logs_config_loc=dags/user_logs/configurations/
        command: worker
服务器2

version: '2.1'
services:
    redis:
        image: 'redis:3.2.7'
        ports:
            - "10.0.11.5:6999:6379"
        command: redis-server

    postgres:
        image: postgres:9.6
        container_name: postgres-airflow
        ports:
            - "10.0.11.5:5434:5432"
        environment:
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow

    webserver:
        image: puckel/docker-airflow:latest
        container_name: airflow
        restart: always
        depends_on:
            - postgres
            - redis
        environment:
            - LOAD_EX=n
            - FERNET_KEY=<>
            - EXECUTOR=Celery
            - user_logs_config_loc=dags/user_logs/configurations/
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow
        volumes:
            - /data/qa/druid-data/airflow/dags:/usr/local/airflow/dags
            - /var/run/docker.sock:/var/run/docker.sock
        ports:
            - "10.0.11.5:8085:8080"
        command: webserver
        healthcheck:
            test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"]
            interval: 30s

    flower:
        image: puckel/docker-airflow:1.10.2
        restart: always
        depends_on:
            - redis
        environment:
            - EXECUTOR=Celery
        ports:
            - "5555:5555"
        command: flower

    scheduler:
        image: puckel/docker-airflow:1.10.2
        restart: always
        depends_on:
            - webserver
        volumes:
            - ./dags:/usr/local/airflow/dags
            - /data/qa/druid-data/airflow/dags:/usr/local/airflow/dags
        environment:
            - LOAD_EX=n
            - FERNET_KEY=<>
            - EXECUTOR=Celery
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow
        command: scheduler

    worker:
        image: puckel/docker-airflow:1.10.2
        restart: always
        depends_on:
            - scheduler
        volumes:
            - ./dags:/usr/local/airflow/dags
            - /data/qa/druid-data/airflow/dags:/usr/local/airflow/dags
        environment:
            - FERNET_KEY=<>
            - EXECUTOR=Celery
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow
        command: worker -q druid_queue
服务器1中的变量如下所示 代理地址=redis://redis:6379/1 结果_backend=db+postgresql://airflow:airflow@博士后:5432/人

服务器2中的变量如下所示 代理地址=redis://10.0.11.4:6999/1 结果_backend=db+postgresql://airflow:airflow@10.0.11.4:5434/气流

我的配置有问题吗。 从服务器A的Web服务器运行dag时,dag被卡住:

在服务器容器的计划程序中捕获的日志:

[2019-04-12 14:42:35,184] {{jobs.py:1215}} INFO - Setting the follow tasks to queued state:
    <TaskInstance: dummy_for_testing.print_date 2019-04-12 14:42:33.552786+00:00 [scheduled]>
[2019-04-12 14:42:35,194] {{jobs.py:1299}} INFO - Setting the following 1 tasks to queued state:
    <TaskInstance: dummy_for_testing.print_date 2019-04-12 14:42:33.552786+00:00 [queued]>
[2019-04-12 14:42:35,194] {{jobs.py:1341}} INFO - Sending ('dummy_for_testing', 'print_date', datetime.datetime(2019, 4, 12, 14, 42, 33, 552786, tzinfo=<TimezoneInfo [UTC, GMT, +00:00:00, STD]>), 1) to executor with priority 3 and queue default
[2019-04-12 14:42:35,194] {{base_executor.py:56}} INFO - Adding to queue: airflow run dummy_for_testing print_date 2019-04-12T14:42:33.552786+00:00 --local -sd /usr/local/airflow/dags/dag_test.py
[2019-04-12 14:42:35,199] {{celery_executor.py:83}} INFO - [celery] queuing ('dummy_for_testing', 'print_date', datetime.datetime(2019, 4, 12, 14, 42, 33, 552786, tzinfo=<TimezoneInfo [UTC, GMT, +00:00:00, STD]>), 1) through celery, queue=default
[2019-04-12 14:42:37,152] {{jobs.py:1559}} INFO - Harvesting DAG parsing results
[2019-04-12 14:42:39,154] {{jobs.py:1559}} INFO - Harvesting DAG parsing results
[2019-04-12 14:42:40,610] {{sqlalchemy.py:79}} WARNING - DB connection invalidated. Reconnecting...
[2019-04-12 14:42:41,156] {{jobs.py:1559}} INFO - Harvesting DAG parsing results
[2019-04-12 14:42:41,179] {{jobs.py:1106}} INFO - 1 tasks up for execution:
    <TaskInstance: dummy_for_testing.print_host 2019-04-12 14:42:33.552786+00:00 [scheduled]>
[2019-04-12 14:42:41,182] {{jobs.py:1141}} INFO - Figuring out tasks to run in Pool(name=None) with 128 open slots and 1 task instances in queue
[2019-04-12 14:42:41,184] {{jobs.py:1177}} INFO - DAG dummy_for_testing has 12/16 running and queued tasks
[2019-04-12 14:42:41,184] {{jobs.py:1215}} INFO - Setting the follow tasks to queued state:
    <TaskInstance: dummy_for_testing.print_host 2019-04-12 14:42:33.552786+00:00 [scheduled]>
[2019-04-12 14:42:41,193] {{jobs.py:1299}} INFO - Setting the following 1 tasks to queued state:
    <TaskInstance: dummy_for_testing.print_host 2019-04-12 14:42:33.552786+00:00 [queued]>
[2019-04-12 14:42:41,193] {{jobs.py:1341}} INFO - Sending ('dummy_for_testing', 'print_host', datetime.datetime(2019, 4, 12, 14, 42, 33, 552786, tzinfo=<TimezoneInfo [UTC, GMT, +00:00:00, STD]>), 1) to executor with priority 2 and queue druid_queue
[2019-04-12 14:42:41,194] {{base_executor.py:56}} INFO - Adding to queue: airflow run dummy_for_testing print_host 2019-04-12T14:42:33.552786+00:00 --local -sd /usr/local/airflow/dags/dag_test.py
[2019-04-12 14:42:41,198] {{celery_executor.py:83}} INFO - [celery] queuing ('dummy_for_testing', 'print_host', datetime.datetime(2019, 4, 12, 14, 42, 33, 552786, tzinfo=<TimezoneInfo [UTC, GMT, +00:00:00, STD]>), 1) through celery, queue=druid_queue
服务器配置: 服务器B配置: 为芹菜经纪人服务

看起来您正在两台不同的服务器上运行相同的docker compose堆栈,但服务器B已使用命令worker-q druid_queue启动了worker。通常,您希望在所有服务器上仅使用一个调度器、一个数据库/结果后端和一个MessageBroker redis来运行airflow,而不是在每台服务器上运行每项服务


您在第一台服务器上的compose文件在10.0.1.4:6999和10.0.1.4:6999公开了redis,您注意到第二台服务器上的代理url是redis://10.0.11.4:6999/1. 如果网络设置正确,那么可能只需将代理url更新为redis://10.0.1.4:6999/1 注:11->1

卡住是什么意思?您可以从容器中获取docker日志吗?已添加服务器A的调度程序的日志。您想要哪个容器的日志?原因是:警告-DB连接无效。重新连接。。。我认为您没有正确配置芹菜结果后端?您已经在上面添加了芹菜结果后端配置。根据文档,我们应该在两台服务器上有相同的连接。如何检查错误服务器A有一个名为postgres的主机,而服务器B有一个合适的主机IP。使用机器的IP或域名配置服务器A。对不起,我的错误。这是我的错别字。我的redis位于10.0.11.4服务器A中。我正在更改服务器B yml文件。现在,它将只包含Web服务器和工作程序。正如您在上面服务器B的yml文件的worker部分下所看到的,存在对调度器的依赖。如何确保此工作程序指向服务器调度程序,以便在使用docker compose命令时不会出错。