Parallel processing 安排多个DAG时气流计划程序暂停问题

Parallel processing 安排多个DAG时气流计划程序暂停问题,parallel-processing,airflow,airflow-scheduler,airflow-worker,Parallel Processing,Airflow,Airflow Scheduler,Airflow Worker,各位,我正在为一个项目使用气流(v.1.10.12)来安排每天的工作。想要并行执行多个DAG(14)。已更新cfg文件中的并发参数。但是,我们观察到以下行为:调度器开始执行任务,将任务分配给队列,但在一定时间后暂停,并在5分钟后重新启动。由于这种行为,所有Dag所花费的时间都是巨大的,而且一些任务也会失败。有人能帮助我理解为什么调度程序会停止,我们是否应该修改一些参数或切换到其他版本 下面是配置文件 [core] dags_folder = /home/airflow/dags base_log

各位,我正在为一个项目使用气流(v.1.10.12)来安排每天的工作。想要并行执行多个DAG(14)。已更新cfg文件中的并发参数。但是,我们观察到以下行为:调度器开始执行任务,将任务分配给队列,但在一定时间后暂停,并在5分钟后重新启动。由于这种行为,所有Dag所花费的时间都是巨大的,而且一些任务也会失败。有人能帮助我理解为什么调度程序会停止,我们是否应该修改一些参数或切换到其他版本

下面是配置文件

[core]
dags_folder = /home/airflow/dags
base_log_folder = /home/airflow/logs
remote_logging = False
remote_log_conn_id =
remote_base_log_folder =
encrypt_s3_logs = False
logging_level = INFO
fab_logging_level = WARN
logging_config_class =
colored_console_log = True
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log
log_processor_filename_template = {{ filename }}.log
dag_processor_manager_log_location = /home/airflow/logs/dag_processor_manager/dag_processor_manager.log
hostname_callable = socket:getfqdn
default_timezone = utc
executor = CeleryExecutor
sql_alchemy_conn = postgresql+psycopg2://devairflow:airflow@localhost:5432/pcfdb 
sql_engine_encoding = utf-8
sql_alchemy_pool_enabled = True
sql_alchemy_pool_size = 0
sql_alchemy_max_overflow = -1    
# The SqlAlchemy pool recycle is the number of seconds a connection
# can be idle in the pool before it is invalidated. This config does
# not apply to sqlite. If the number of DB connections is ever exceeded,
# a lower config value will allow the system to recover faster.
sql_alchemy_pool_recycle = 1800
sql_alchemy_pool_pre_ping = True
sql_alchemy_schema =
parallelism = 50
dag_concurrency = 50
dags_are_paused_at_creation = True
max_active_runs_per_dag = 50
load_examples = False
load_default_connections = True
plugins_folder = /home/airflow/plugins
fernet_key = M4dpP6f2Hd5p3N--CxtIoUo9XaSDifA42MPLs1UR7-g=
donot_pickle = False
dagbag_import_timeout = 41460
dag_file_processor_timeout = 60
task_runner = StandardTaskRunner
default_impersonation =
security =
secure_mode = False
unit_test_mode = False
enable_xcom_pickling = True
killed_task_cleanup_time = 60
dag_run_conf_overrides_params = False
worker_precheck = False
dag_discovery_safe_mode = True
default_task_retries = 0
store_serialized_dags = False
min_serialized_dag_update_interval = 30

max_num_rendered_ti_fields_per_task = 100

# On each dagrun check against defined SLAs
check_slas = True

[secrets]
# Full class name of secrets backend to enable (will precede env vars and metastore in search path)
# Example: backend = airflow.contrib.secrets.aws_systems_manager.SystemsManagerParameterStoreBackend
backend =

# The backend_kwargs param is loaded into a dictionary and passed to __init__ of secrets backend class.
# See documentation for the secrets backend you are using. JSON is expected.
# Example for AWS Systems Manager ParameterStore:
# ``{"connections_prefix": "/airflow/connections", "profile_name": "default"}``
backend_kwargs =

[cli]
# In what way should the cli access the API. The LocalClient will use the
# database directly, while the json_client will use the api running on the
# webserver
api_client = airflow.api.client.local_client

# If you set web_server_url_prefix, do NOT forget to append it here, ex:
# ``endpoint_url = http://localhost:8080/myroot``
# So api will look like: ``http://localhost:8080/myroot/api/experimental/...``
endpoint_url = http://localhost:8080

[debug]
# Used only with DebugExecutor. If set to True DAG will fail with first
# failed task. Helpful for debugging purposes.
fail_fast = False

[api]
# How to authenticate users of the API. See
# https://airflow.apache.org/docs/stable/security.html for possible values.
# ("airflow.api.auth.backend.default" allows all requests for historic reasons)
auth_backend = airflow.api.auth.backend.deny_all

[lineage]
# what lineage backend to use
backend =

[atlas]
sasl_enabled = False
host =
port = 21000
username =
password =

[operators]
# The default owner assigned to each new operator, unless
# provided explicitly or passed via ``default_args``
default_owner = airflow
default_cpus = 1
default_ram = 1024
default_disk = 1024
default_gpus = 0

[hive]
# Default mapreduce queue for HiveOperator tasks
default_hive_mapred_queue =

[webserver]
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is used in automated emails that
# airflow sends to point links to the right web server
base_url = http://localhost:8080

# Default timezone to display all dates in the RBAC UI, can be UTC, system, or
# any IANA timezone string (e.g. Europe/Amsterdam). If left empty the
# default value of core/default_timezone will be used
# Example: default_ui_timezone = America/New_York
default_ui_timezone = UTC

# The ip specified when starting the web server
web_server_host = 0.0.0.0

# The port on which to run the web server
web_server_port = 8080

# Paths to the SSL certificate and key for the web server. When both are
# provided SSL will be enabled. This does not change the web server port.
web_server_ssl_cert =

# Paths to the SSL certificate and key for the web server. When both are
# provided SSL will be enabled. This does not change the web server port.
web_server_ssl_key =

# Number of seconds the webserver waits before killing gunicorn master that doesn't respond
web_server_master_timeout = 41460

# Number of seconds the gunicorn webserver waits before timing out on a worker
web_server_worker_timeout = 41460

# Number of workers to refresh at a time. When set to 0, worker refresh is
# disabled. When nonzero, airflow periodically refreshes webserver workers by
# bringing up new ones and killing old ones.
worker_refresh_batch_size = 1

# Number of seconds to wait before refreshing a batch of workers.
worker_refresh_interval = 30

# If set to True, Airflow will track files in plugins_folder directory. When it detects changes,
# then reload the gunicorn.
reload_on_plugin_change = False

# Secret key used to run your flask app
# It should be as random as possible
secret_key = temporary_key

# Number of workers to run the Gunicorn web server
workers = 4

# The worker class gunicorn should use. Choices include
# sync (default), eventlet, gevent
worker_class = sync

# Log files for the gunicorn webserver. '-' means log to stderr.
access_logfile = -

# Log files for the gunicorn webserver. '-' means log to stderr.
error_logfile = -

# Expose the configuration file in the web server
expose_config = True

# Expose hostname in the web server
expose_hostname = True

# Expose stacktrace in the web server
expose_stacktrace = True

# Set to true to turn on authentication:
# https://airflow.apache.org/security.html#web-authentication
authenticate = False

# Filter the list of dags by owner name (requires authentication to be enabled)
filter_by_owner = False

# Filtering mode. Choices include user (default) and ldapgroup.
# Ldap group filtering requires using the ldap backend
#
# Note that the ldap server needs the "memberOf" overlay to be set up
# in order to user the ldapgroup mode.
owner_mode = user

# Default DAG view. Valid values are:
# tree, graph, duration, gantt, landing_times
dag_default_view = tree

# "Default DAG orientation. Valid values are:"
# LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top)
dag_orientation = LR

# Puts the webserver in demonstration mode; blurs the names of Operators for
# privacy.
demo_mode = False

# The amount of time (in secs) webserver will wait for initial handshake
# while fetching logs from other worker machine
log_fetch_timeout_sec = 5

# Time interval (in secs) to wait before next log fetching.
log_fetch_delay_sec = 2

# Distance away from page bottom to enable auto tailing.
log_auto_tailing_offset = 30

# Animation speed for auto tailing log display.
log_animation_speed = 1000

# By default, the webserver shows paused DAGs. Flip this to hide paused
# DAGs by default
hide_paused_dags_by_default = False

# Consistent page size across all listing views in the UI
page_size = 100

# Use FAB-based webserver with RBAC feature
rbac = False

# Define the color of navigation bar
navbar_color = #007A87

# Default dagrun to show in UI
default_dag_run_display_number = 25

# Enable werkzeug ``ProxyFix`` middleware for reverse proxy
enable_proxy_fix = False

# Number of values to trust for ``X-Forwarded-For``.
# More info: https://werkzeug.palletsprojects.com/en/0.16.x/middleware/proxy_fix/
proxy_fix_x_for = 1

# Number of values to trust for ``X-Forwarded-Proto``
proxy_fix_x_proto = 1

# Number of values to trust for ``X-Forwarded-Host``
proxy_fix_x_host = 1

# Number of values to trust for ``X-Forwarded-Port``
proxy_fix_x_port = 1

# Number of values to trust for ``X-Forwarded-Prefix``
proxy_fix_x_prefix = 1

# Set secure flag on session cookie
cookie_secure = False

# Set samesite policy on session cookie
cookie_samesite =

# Default setting for wrap toggle on DAG code and TI log views.
default_wrap = False

# Allow the UI to be rendered in a frame
x_frame_enabled = True

# Send anonymous user activity to your analytics tool
# choose from google_analytics, segment, or metarouter
# analytics_tool =

# Unique ID of your account in the analytics tool
# analytics_id =

# Update FAB permissions and sync security manager roles
# on webserver startup
update_fab_perms = True

# Minutes of non-activity before logged out from UI
# 0 means never get forcibly logged out
force_log_out_after = 0

# The UI cookie lifetime in days
session_lifetime_days = 30

[email]
email_backend = airflow.utils.email.send_email_smtp

[smtp]

# If you want airflow to send emails on retries, failure, and you want to use
# the airflow.utils.email.send_email_smtp function, you have to configure an
# smtp server here
#smtp_host = localhost


# SMTP Address
# 192.168.152.213

# SMTP Port
# 25

# User Name
# etf@csopasset.com




smtp_host = *.*.*.*
smtp_starttls = True

smtp_ssl = False
# smtp_user = etf@csopasset.com
# smtp_password = etfGen2013
smtp_port = 25
smtp_mail_from = etf@***.com

[sentry]

# Sentry (https://docs.sentry.io) integration
sentry_dsn =

[celery]

# This section only applies if you are using the CeleryExecutor in
# ``[core]`` section above
# The app name that will be used by celery
celery_app_name = airflow.executors.celery_executor

# The concurrency that will be used when starting workers with the
# ``airflow celery worker`` command. This defines the number of task instances that
# a worker will take, so size up your workers based on the resources on
# your worker box and the nature of your tasks
worker_concurrency = 50

# The maximum and minimum concurrency that will be used when starting workers with the
# ``airflow celery worker`` command (always keep minimum processes, but grow
# to maximum if necessary). Note the value should be max_concurrency,min_concurrency
# Pick these numbers based on resources on worker box and the nature of the task.
# If autoscale option is available, worker_concurrency will be ignored.
# http://docs.celeryproject.org/en/latest/reference/celery.bin.worker.html#cmdoption-celery-worker-autoscale
# Example: worker_autoscale = 16,12
# worker_autoscale =

# When you start an airflow worker, airflow starts a tiny web server
# subprocess to serve the workers local log files to the airflow main
# web server, who then builds pages and sends them to users. This defines
# the port on which the logs are served. It needs to be unused, and open
# visible from the main web server to connect into the workers.
worker_log_server_port = 8793

# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
# a sqlalchemy database. Refer to the Celery documentation for more
# information.
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings
#roker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow

broker_url = amqp://guest:guest@localhost:5672//
# The Celery result_backend. When a job finishes, it needs to update the
# metadata of the job. Therefore it will post a message on a message bus,
# or insert it into a database (depending of the backend)
# This status is used by the scheduler to update the state of the task
# The use of a database is highly recommended
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings
#result_backend = db+mysql://airflow:airflow@localhost:3306/airflow
result_backend = db+postgresql+psycopg2://devairflow:airflow@localhost:5432/pcfdb

# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it ``airflow flower``. This defines the IP that Celery Flower runs on
flower_host = 0.0.0.0

# The root URL for Flower
# Example: flower_url_prefix = /flower
flower_url_prefix =

# This defines the port that Celery Flower runs on
flower_port = 5555

# Securing Flower with Basic Authentication
# Accepts user:password pairs separated by a comma
# Example: flower_basic_auth = user1:password1,user2:password2
flower_basic_auth =

# Default queue that tasks get assigned to and that worker listen on.
default_queue = default

# How many processes CeleryExecutor uses to sync task state.
# 0 means to use max(1, number of cores - 1) processes.
sync_parallelism = 0

# Import path for celery configuration options
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG

# In case of using SSL
ssl_active = False
ssl_key =
ssl_cert =
ssl_cacert =

# Celery Pool implementation.
# Choices include: prefork (default), eventlet, gevent or solo.
# See:
# https://docs.celeryproject.org/en/latest/userguide/workers.html#concurrency
# https://docs.celeryproject.org/en/latest/userguide/concurrency/eventlet.html
pool = prefork

# The number of seconds to wait before timing out ``send_task_to_executor`` or
# ``fetch_celery_task_state`` operations.
operation_timeout = 50

[celery_broker_transport_options]

# This section is for specifying options which can be passed to the
# underlying celery broker transport. See:
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_transport_options
# The visibility timeout defines the number of seconds to wait for the worker
# to acknowledge the task before the message is redelivered to another worker.
# Make sure to increase the visibility timeout to match the time of the longest
# ETA you're planning to use.
# visibility_timeout is only supported for Redis and SQS celery brokers.
# See:
# http://docs.celeryproject.org/en/master/userguide/configuration.html#std:setting-broker_transport_options
# Example: visibility_timeout = 21600
# visibility_timeout =

[dask]

# This section only applies if you are using the DaskExecutor in
# [core] section above
# The IP address and port of the Dask cluster's scheduler.
cluster_address = 127.0.0.1:8786

# TLS/ SSL settings to access a secured Dask scheduler.
tls_ca =
tls_cert =
tls_key =

[scheduler]
# Task instances listen for external kill signal (when you clear tasks
# from the CLI or the UI), this defines the frequency at which they should
# listen (in seconds).
job_heartbeat_sec = 5

# The scheduler constantly tries to trigger new tasks (look at the
# scheduler section in the docs for more information). This defines
# how often the scheduler should run (in seconds).
scheduler_heartbeat_sec = 5

# After how much time should the scheduler terminate in seconds
# -1 indicates to run continuously (see also num_runs)
run_duration = -1

# The number of times to try to schedule each DAG file
# -1 indicates unlimited number
num_runs = -1

# The number of seconds to wait between consecutive DAG file processing
processor_poll_interval = 1

# after how much time (seconds) a new DAGs should be picked up from the filesystem
min_file_process_interval = 0

# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes.
dag_dir_list_interval = 300

# How often should stats be printed to the logs. Setting to 0 will disable printing stats
print_stats_interval = 30

# If the last scheduler heartbeat happened more than scheduler_health_check_threshold
# ago (in seconds), scheduler is considered unhealthy.
# This is used by the health check in the "/health" endpoint
scheduler_health_check_threshold = 30
child_process_log_directory = /home/airflow/logs/scheduler

# Local task jobs periodically heartbeat to the DB. If the job has
# not heartbeat in this many seconds, the scheduler will mark the
# associated task instance as failed and will re-schedule the task.
scheduler_zombie_task_threshold = 1800

# Turn off scheduler catchup by setting this to False.
# Default behavior is unchanged and
# Command Line Backfills still work, but the scheduler
# will not do scheduler catchup if this is False,
# however it can be set on a per DAG basis in the
# DAG definition (catchup)
catchup_by_default = True

# This changes the batch size of queries in the scheduling main loop.
# If this is too high, SQL query performance may be impacted by one
# or more of the following:
# - reversion to full table scan
# - complexity of query predicate
# - excessive locking
# Additionally, you may hit the maximum allowable query length for your db.
# Set this to 0 for no limit (not advised)
max_tis_per_query = 512

# Statsd (https://github.com/etsy/statsd) integration settings
statsd_on = False
statsd_host = localhost
statsd_port = 8125
statsd_prefix = airflow

# If you want to avoid send all the available metrics to StatsD,
# you can configure an allow list of prefixes to send only the metrics that
# start with the elements of the list (e.g: scheduler,executor,dagrun)
statsd_allow_list =

# The scheduler can run multiple threads in parallel to schedule dags.
# This defines how many threads will run.
max_threads = 2
authenticate = False

# Turn off scheduler use of cron intervals by setting this to False.
# DAGs submitted manually in the web UI or with trigger_dag will still run.
use_job_schedule = True

# Allow externally triggered DagRuns for Execution Dates in the future
# Only has effect if schedule_interval is set to None in DAG
allow_trigger_in_future = False

[ldap]
# set this to ldaps://<your.ldap.server>:<port>
uri =
user_filter = objectClass=*
user_name_attr = uid
group_member_attr = memberOf
superuser_filter =
data_profiler_filter =
bind_user = cn=Manager,dc=example,dc=com
bind_password = insecure
basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL

# This setting allows the use of LDAP servers that either return a
# broken schema, or do not return a schema.
ignore_malformed_schema = False

[mesos]
# Mesos master address which MesosExecutor will connect to.
master = localhost:5050

# The framework name which Airflow scheduler will register itself as on mesos
framework_name = Airflow

# Number of cpu cores required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_cpu = 1

# Memory in MB required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_memory = 256

# Enable framework checkpointing for mesos
# See http://mesos.apache.org/documentation/latest/slave-recovery/
checkpoint = False

# Failover timeout in milliseconds.
# When checkpointing is enabled and this option is set, Mesos waits
# until the configured timeout for
# the MesosExecutor framework to re-register after a failover. Mesos
# shuts down running tasks if the
# MesosExecutor framework fails to re-register within this timeframe.
# Example: failover_timeout = 604800
# failover_timeout =

# Enable framework authentication for mesos
# See http://mesos.apache.org/documentation/latest/configuration/
authenticate = False

# Mesos credentials, if authentication is enabled
# Example: default_principal = admin
# default_principal =
# Example: default_secret = admin
# default_secret =

# Optional Docker Image to run on slave before running the command
# This image should be accessible from mesos slave i.e mesos slave
# should be able to pull this docker image before executing the command.
# Example: docker_image_slave = puckel/docker-airflow
# docker_image_slave =

[kerberos]
ccache = /tmp/airflow_krb5_ccache

# gets augmented with fqdn
principal = airflow
reinit_frequency = 3600
kinit_path = kinit
keytab = airflow.keytab

[github_enterprise]
api_rev = v3

[admin]
# UI to hide sensitive variable fields when set to True
hide_sensitive_variable_fields = True

[elasticsearch]
# Elasticsearch host
host =

# Format of the log_id, which is used to query for a given tasks logs
log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}

# Used to mark the end of a log stream for a task
end_of_log_mark = end_of_log

# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
# Code will construct log_id using the log_id template from the argument above.
# NOTE: The code will prefix the https:// automatically, don't include that here.
frontend =

# Write the task logs to the stdout of the worker, rather than the default files
write_stdout = False

# Instead of the default log formatter, write the log lines as JSON
json_format = False

# Log fields to also attach to the json output, if enabled
json_fields = asctime, filename, lineno, levelname, message

[elasticsearch_configs]
use_ssl = False
verify_certs = True
[core]
dags_文件夹=/home/afflow/dags
基本日志文件夹=/home/afflow/logs
远程日志记录=错误
远程日志连接id=
远程\u基本\u日志\u文件夹=
加密\u s3\u日志=False
日志记录\u级别=信息
工厂日志记录级别=警告
日志记录\u配置\u类=
彩色控制台日志=真
彩色日志格式=[%%(蓝色)s%%(asctime)s%%(重置)s]{%%(蓝色)s%%(文件名)s:%%(重置)s%%(行号)d}%%(日志颜色)s%%(levelname)s%%(重置)s-%%(日志颜色)s%%(消息)s%%(重置)s
彩色\u格式化程序\u类=afflow.utils.log.colored\u log.CustomTTYColoredFormatter
日志格式=[%%(asctime)s]{%%(文件名)s:%%(行号)d}%%(levelname)s-%%(消息)s
简单日志格式=%%(asctime)s%%(levelname)s-%%(message)s
log_filename_template={{ti.dag_id}}/{{ti.task_id}}/{{ts}/{{try_number}.log
日志\处理器\文件名\模板={{filename}}.log
dag_处理器_管理器_日志_位置=/home/afflow/logs/dag_处理器_管理器/dag_处理器_管理器.log
hostname\u callable=socket:getfqdn
默认时区=utc
执行者=芹菜执行者
sql\u alchemy\u conn=postgresql+psycopg2://devairflow:airflow@localhost:5432/pcfdb
sql_引擎_编码=utf-8
sql\u alchemy\u pool\u enabled=True
sql\u炼金术\u池\u大小=0
sql\u炼金术\u最大\u溢出=-1
#SqlAlchemy池循环是连接的秒数
#可以在无效之前在池中处于空闲状态。此配置不支持
#不适用于sqlite。如果超过了DB连接数,
#较低的配置值将允许系统更快地恢复。
sql\u炼金术\u池\u回收=1800
sql\u alchemy\u pool\u pre\u ping=True
sql\u炼金术\u模式=
平行度=50
dag_并发=50
DAG在创建时暂停=True
每个dag的最大活动运行次数=50
加载示例=False
加载\默认\连接=真
plugins\u folder=/home/afflow/plugins
fernet_key=M4dpP6f2Hd5p3N--cxtiouo9xasdifa42mpls17-g=
donot_pickle=错误
dagbag\u导入\u超时=41460
dag文件处理器超时=60
任务运行程序=标准任务运行程序
默认模拟=
保安=
安全模式=错误
单元测试模式=错误
启用\u xcom\u酸洗=真
已终止任务清理时间=60
dag_运行_配置_覆盖_参数=False
工人预检查=错误
dag\u发现\u安全\u模式=真
默认任务重试次数=0
存储\u序列化的\u dags=False
最小序列化数据更新间隔=30
每个任务的最大渲染字段数=100
#在每个DAG上,根据定义的SLA运行检查
检查\u slas=True
[秘密]
#要启用的机密后端的完整类名(将位于搜索路径中的环境变量和元存储之前)
#示例:backend=afflow.contrib.secrets.aws\u systems\u manager.SystemsManagerParameterStoreBackend
后端=
#后端的_kwargs参数被加载到字典中,并传递给后端类的uuu init_uuu。
#请参阅您正在使用的机密后端的文档。应该是JSON。
#AWS Systems Manager参数存储示例:
#``{“连接前缀”:“/气流/连接”,“配置文件名称”:“默认值”}``
克瓦格斯酒店=
[cli]
#cli应以何种方式访问API。本地客户端将使用
#直接访问数据库,而json_客户端将使用在
#网络服务器
api_客户端=aiffort.api.client.local_客户端
#如果设置了web\u服务器\u url\u前缀,请不要忘记将其附加在此处,例如:
#`endpoint_url=http://localhost:8080/myroot``
#因此,api将如下所示:``http://localhost:8080/myroot/api/experimental/...``
端点\ url=http://localhost:8080
[调试]
#仅与DebugExecutor一起使用。如果设置为True,DAG将以第一个失败
#任务失败。有助于调试目的。
fail\u fast=False
[空气污染指数]
#如何对API的用户进行身份验证。看见
# https://airflow.apache.org/docs/stable/security.html 查找可能的值。
#(“airflow.api.auth.backend.default”允许出于历史原因的所有请求)
auth_backend=afflow.api.auth.backend.deny_all
[血统]
#使用什么样的沿袭后端
后端=
[地图集]
sasl_已启用=错误
主人=
端口=21000
用户名=
密码=
[操作员]
#分配给每个新操作员的默认所有者,除非
#通过``默认参数显式提供或传递``
默认所有者=气流
默认\u CPU=1
默认内存=1024
默认磁盘=1024
默认gpu=0
[蜂箱]
#HiveOperator任务的默认mapreduce队列
默认配置单元映射队列=
[网络服务器]
#您的网站的基本url无法猜测是哪个域或
#您正在使用的cname。这在自动电子邮件中使用
#气流将指向点的链接发送到正确的web服务器
基本url=http://localhost:8080
#在RBAC UI中显示所有日期的默认时区可以是UTC、system或
#任何IANA时区字符串(如欧洲/阿姆斯特丹)。如果留下空的话
#将使用core/default_时区的默认值
#示例:默认时区=美国/纽约
默认时区=UTC
#启动web服务器时指定的ip
web_服务器_主机=0.0.0.0
#运行web服务器的端口
web_服务器_端口=8080
#web服务器的SSL证书和密钥的路径。当两者都是
#提供的SSL将被启用。这不会更改web服务器端口。
网络服务器ssl证书=
#web服务器的SSL证书和密钥的路径。当两者都是
#提供的SSL将被启用。这不会更改web服务器端口。
web\u服务器\u ssl\u密钥=
#Web服务器在杀死gunicorn主机之前等待的秒数
scheduler_health_check_threshold = 300
scheduler_heartbeat_sec = 60