Airflow 气流dag-任务立即放入';上一次重试';状态(';开始日期是1天前)

Airflow 气流dag-任务立即放入';上一次重试';状态(';开始日期是1天前),airflow,airflow-scheduler,Airflow,Airflow Scheduler,我不知道我是否缺乏气流调度器知识,或者这是否是气流的潜在缺陷 情况是这样的: 我的dag的开始日期设置为“开始日期”:aiffort.utils.dates.days\u ago(1), 我将dag上传到文件夹中,气流在其中扫描dag 然后打开dag(默认情况下为“关闭”) 管道中的任务立即进入“up\u for\u retry”,而您实际上看不到以前尝试过的内容 气流版本信息:版本:1.10.14。它在蔚蓝的kubenetes上运行 任务实例详细信息如下所示: 我是否遗漏了一些东西来判断它

我不知道我是否缺乏气流调度器知识,或者这是否是气流的潜在缺陷

情况是这样的:

  • 我的dag的开始日期设置为
    “开始日期”:aiffort.utils.dates.days\u ago(1),
  • 我将dag上传到文件夹中,气流在其中扫描dag
  • 然后打开dag(默认情况下为“关闭”)
  • 管道中的任务立即进入“up\u for\u retry”,而您实际上看不到以前尝试过的内容
  • 气流版本信息:
    版本:1.10.14
    。它在蔚蓝的kubenetes上运行
  • 任务实例详细信息如下所示:
我是否遗漏了一些东西来判断它是否是一个bug或是否是预期的

添加
,下面是所要求的DAG定义

import airflow
from airflow import DAG
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
from airflow.models import Variable

dag_args = {
    "owner": "our_project_team_name",
    "retries": 1,
    "email": ["ouremail_address_replaced_by_this_string"],
    "email_on_failure": True,
    "email_on_retry": True,
    "depends_on_past": False,
    "start_date": airflow.utils.dates.days_ago(1),
}
# Implement cluster reuse on Databricks, pick from light, medium, heavy cluster type based on workloads
clusters = Variable.get("our_project_team_namejob_cluster_config", deserialize_json=True)

databricks_connection = "our_company_databricks"
adl_connection = "our_company_wasb"

pipeline_name = "process_our_data_from_boomi"

dag = DAG(dag_id=pipeline_name, default_args=dag_args, schedule_interval="0 3 * * *")

notebook_dir = "/Shared/our_data_name"
lib_path_sub = ""
lib_name_dev_plus_branch = ""
atlas_library = {
    "whl": f"dbfs:/python-wheels/atlas{lib_path_sub}/atlas_library-0{lib_name_dev_plus_branch}-py3-none-any.whl"
}

create_our_data_name_source_data_from_boomi_notebook_params = {
    "existing_cluster_id": clusters["our_cluster_name"],
    "notebook_task": {
        "notebook_path": f"{notebook_dir}/create_our_data_name_source_data_from_boomi",
        "base_parameters": {"Extraction_date": "{{ ds_nodash  }}"},
    },
}

create_our_data_name_standardized_table_from_source_xml_notebook_params = {
    "existing_cluster_id": clusters["our_cluster_name"],
    "notebook_task": {
        "notebook_path": f"{notebook_dir}/create_our_data_name_standardized_table_from_source_xml",
        "base_parameters": {"Extraction_date": "{{ ds_nodash  }}"},
    },
}

create_our_data_name_enriched_table_from_standardized_notebook_params = {
    "existing_cluster_id": clusters["our_cluster_name"],
    "notebook_task": {
        "notebook_path": f"{notebook_dir}/create_our_data_name_enriched",
        "base_parameters": {"Extraction_date": "{{ ds_nodash  }}"},
    },
}

layer_1_task = DatabricksSubmitRunOperator(
    task_id="Load_our_data_name_to_source",
    databricks_conn_id=databricks_connection,
    dag=dag,
    json=create_our_data_name_source_data_from_boomi_notebook_params,
    libraries=[atlas_library],
)

layer_2_task = DatabricksSubmitRunOperator(
    task_id="Load_our_data_name_to_standardized",
    databricks_conn_id=databricks_connection,
    dag=dag,
    json=create_our_data_name_standardized_table_from_source_xml_notebook_params,
    libraries=[
        {"maven": {"coordinates": "com.databricks:spark-xml_2.11:0.5.0"}},
        {"pypi": {"package": "inflection"}},
        atlas_library,
    ],
)

layer_3_task = DatabricksSubmitRunOperator(
    task_id="Load_our_data_name_to_enriched",
    databricks_conn_id=databricks_connection,
    dag=dag,
    json=create_our_data_name_enriched_table_from_standardized_notebook_params,
    libraries=[atlas_library],
)

layer_1_task >> layer_2_task >> layer_3_task

您可以分享气流计划以及如何配置它们吗?@a我在哪里/如何获得气流计划?你是说它多久运行一次?每日的3:00UTC是否可以粘贴DAG配置-python代码?@AnandVidvat,请参阅添加的DAG定义。很抱歉,我提供此信息有点晚。您能否将
default_args
中的
retries
参数设置为0并重试?
import airflow
from airflow import DAG
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
from airflow.models import Variable

dag_args = {
    "owner": "our_project_team_name",
    "retries": 1,
    "email": ["ouremail_address_replaced_by_this_string"],
    "email_on_failure": True,
    "email_on_retry": True,
    "depends_on_past": False,
    "start_date": airflow.utils.dates.days_ago(1),
}
# Implement cluster reuse on Databricks, pick from light, medium, heavy cluster type based on workloads
clusters = Variable.get("our_project_team_namejob_cluster_config", deserialize_json=True)

databricks_connection = "our_company_databricks"
adl_connection = "our_company_wasb"

pipeline_name = "process_our_data_from_boomi"

dag = DAG(dag_id=pipeline_name, default_args=dag_args, schedule_interval="0 3 * * *")

notebook_dir = "/Shared/our_data_name"
lib_path_sub = ""
lib_name_dev_plus_branch = ""
atlas_library = {
    "whl": f"dbfs:/python-wheels/atlas{lib_path_sub}/atlas_library-0{lib_name_dev_plus_branch}-py3-none-any.whl"
}

create_our_data_name_source_data_from_boomi_notebook_params = {
    "existing_cluster_id": clusters["our_cluster_name"],
    "notebook_task": {
        "notebook_path": f"{notebook_dir}/create_our_data_name_source_data_from_boomi",
        "base_parameters": {"Extraction_date": "{{ ds_nodash  }}"},
    },
}

create_our_data_name_standardized_table_from_source_xml_notebook_params = {
    "existing_cluster_id": clusters["our_cluster_name"],
    "notebook_task": {
        "notebook_path": f"{notebook_dir}/create_our_data_name_standardized_table_from_source_xml",
        "base_parameters": {"Extraction_date": "{{ ds_nodash  }}"},
    },
}

create_our_data_name_enriched_table_from_standardized_notebook_params = {
    "existing_cluster_id": clusters["our_cluster_name"],
    "notebook_task": {
        "notebook_path": f"{notebook_dir}/create_our_data_name_enriched",
        "base_parameters": {"Extraction_date": "{{ ds_nodash  }}"},
    },
}

layer_1_task = DatabricksSubmitRunOperator(
    task_id="Load_our_data_name_to_source",
    databricks_conn_id=databricks_connection,
    dag=dag,
    json=create_our_data_name_source_data_from_boomi_notebook_params,
    libraries=[atlas_library],
)

layer_2_task = DatabricksSubmitRunOperator(
    task_id="Load_our_data_name_to_standardized",
    databricks_conn_id=databricks_connection,
    dag=dag,
    json=create_our_data_name_standardized_table_from_source_xml_notebook_params,
    libraries=[
        {"maven": {"coordinates": "com.databricks:spark-xml_2.11:0.5.0"}},
        {"pypi": {"package": "inflection"}},
        atlas_library,
    ],
)

layer_3_task = DatabricksSubmitRunOperator(
    task_id="Load_our_data_name_to_enriched",
    databricks_conn_id=databricks_connection,
    dag=dag,
    json=create_our_data_name_enriched_table_from_standardized_notebook_params,
    libraries=[atlas_library],
)

layer_1_task >> layer_2_task >> layer_3_task