Airflow 气流dag-任务立即放入';上一次重试';状态(';开始日期是1天前)
我不知道我是否缺乏气流调度器知识,或者这是否是气流的潜在缺陷 情况是这样的:Airflow 气流dag-任务立即放入';上一次重试';状态(';开始日期是1天前),airflow,airflow-scheduler,Airflow,Airflow Scheduler,我不知道我是否缺乏气流调度器知识,或者这是否是气流的潜在缺陷 情况是这样的: 我的dag的开始日期设置为“开始日期”:aiffort.utils.dates.days\u ago(1), 我将dag上传到文件夹中,气流在其中扫描dag 然后打开dag(默认情况下为“关闭”) 管道中的任务立即进入“up\u for\u retry”,而您实际上看不到以前尝试过的内容 气流版本信息:版本:1.10.14。它在蔚蓝的kubenetes上运行 任务实例详细信息如下所示: 我是否遗漏了一些东西来判断它
- 我的dag的开始日期设置为
“开始日期”:aiffort.utils.dates.days\u ago(1),
- 我将dag上传到文件夹中,气流在其中扫描dag
- 然后打开dag(默认情况下为“关闭”)
- 管道中的任务立即进入“up\u for\u retry”,而您实际上看不到以前尝试过的内容
- 气流版本信息:
。它在蔚蓝的kubenetes上运行版本:1.10.14
- 任务实例详细信息如下所示:
添加
,下面是所要求的DAG定义
import airflow
from airflow import DAG
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
from airflow.models import Variable
dag_args = {
"owner": "our_project_team_name",
"retries": 1,
"email": ["ouremail_address_replaced_by_this_string"],
"email_on_failure": True,
"email_on_retry": True,
"depends_on_past": False,
"start_date": airflow.utils.dates.days_ago(1),
}
# Implement cluster reuse on Databricks, pick from light, medium, heavy cluster type based on workloads
clusters = Variable.get("our_project_team_namejob_cluster_config", deserialize_json=True)
databricks_connection = "our_company_databricks"
adl_connection = "our_company_wasb"
pipeline_name = "process_our_data_from_boomi"
dag = DAG(dag_id=pipeline_name, default_args=dag_args, schedule_interval="0 3 * * *")
notebook_dir = "/Shared/our_data_name"
lib_path_sub = ""
lib_name_dev_plus_branch = ""
atlas_library = {
"whl": f"dbfs:/python-wheels/atlas{lib_path_sub}/atlas_library-0{lib_name_dev_plus_branch}-py3-none-any.whl"
}
create_our_data_name_source_data_from_boomi_notebook_params = {
"existing_cluster_id": clusters["our_cluster_name"],
"notebook_task": {
"notebook_path": f"{notebook_dir}/create_our_data_name_source_data_from_boomi",
"base_parameters": {"Extraction_date": "{{ ds_nodash }}"},
},
}
create_our_data_name_standardized_table_from_source_xml_notebook_params = {
"existing_cluster_id": clusters["our_cluster_name"],
"notebook_task": {
"notebook_path": f"{notebook_dir}/create_our_data_name_standardized_table_from_source_xml",
"base_parameters": {"Extraction_date": "{{ ds_nodash }}"},
},
}
create_our_data_name_enriched_table_from_standardized_notebook_params = {
"existing_cluster_id": clusters["our_cluster_name"],
"notebook_task": {
"notebook_path": f"{notebook_dir}/create_our_data_name_enriched",
"base_parameters": {"Extraction_date": "{{ ds_nodash }}"},
},
}
layer_1_task = DatabricksSubmitRunOperator(
task_id="Load_our_data_name_to_source",
databricks_conn_id=databricks_connection,
dag=dag,
json=create_our_data_name_source_data_from_boomi_notebook_params,
libraries=[atlas_library],
)
layer_2_task = DatabricksSubmitRunOperator(
task_id="Load_our_data_name_to_standardized",
databricks_conn_id=databricks_connection,
dag=dag,
json=create_our_data_name_standardized_table_from_source_xml_notebook_params,
libraries=[
{"maven": {"coordinates": "com.databricks:spark-xml_2.11:0.5.0"}},
{"pypi": {"package": "inflection"}},
atlas_library,
],
)
layer_3_task = DatabricksSubmitRunOperator(
task_id="Load_our_data_name_to_enriched",
databricks_conn_id=databricks_connection,
dag=dag,
json=create_our_data_name_enriched_table_from_standardized_notebook_params,
libraries=[atlas_library],
)
layer_1_task >> layer_2_task >> layer_3_task
您可以分享气流计划以及如何配置它们吗?@a我在哪里/如何获得气流计划?你是说它多久运行一次?每日的3:00UTC是否可以粘贴DAG配置-python代码?@AnandVidvat,请参阅添加的DAG定义。很抱歉,我提供此信息有点晚。您能否将
default_args
中的retries
参数设置为0并重试?
import airflow
from airflow import DAG
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
from airflow.models import Variable
dag_args = {
"owner": "our_project_team_name",
"retries": 1,
"email": ["ouremail_address_replaced_by_this_string"],
"email_on_failure": True,
"email_on_retry": True,
"depends_on_past": False,
"start_date": airflow.utils.dates.days_ago(1),
}
# Implement cluster reuse on Databricks, pick from light, medium, heavy cluster type based on workloads
clusters = Variable.get("our_project_team_namejob_cluster_config", deserialize_json=True)
databricks_connection = "our_company_databricks"
adl_connection = "our_company_wasb"
pipeline_name = "process_our_data_from_boomi"
dag = DAG(dag_id=pipeline_name, default_args=dag_args, schedule_interval="0 3 * * *")
notebook_dir = "/Shared/our_data_name"
lib_path_sub = ""
lib_name_dev_plus_branch = ""
atlas_library = {
"whl": f"dbfs:/python-wheels/atlas{lib_path_sub}/atlas_library-0{lib_name_dev_plus_branch}-py3-none-any.whl"
}
create_our_data_name_source_data_from_boomi_notebook_params = {
"existing_cluster_id": clusters["our_cluster_name"],
"notebook_task": {
"notebook_path": f"{notebook_dir}/create_our_data_name_source_data_from_boomi",
"base_parameters": {"Extraction_date": "{{ ds_nodash }}"},
},
}
create_our_data_name_standardized_table_from_source_xml_notebook_params = {
"existing_cluster_id": clusters["our_cluster_name"],
"notebook_task": {
"notebook_path": f"{notebook_dir}/create_our_data_name_standardized_table_from_source_xml",
"base_parameters": {"Extraction_date": "{{ ds_nodash }}"},
},
}
create_our_data_name_enriched_table_from_standardized_notebook_params = {
"existing_cluster_id": clusters["our_cluster_name"],
"notebook_task": {
"notebook_path": f"{notebook_dir}/create_our_data_name_enriched",
"base_parameters": {"Extraction_date": "{{ ds_nodash }}"},
},
}
layer_1_task = DatabricksSubmitRunOperator(
task_id="Load_our_data_name_to_source",
databricks_conn_id=databricks_connection,
dag=dag,
json=create_our_data_name_source_data_from_boomi_notebook_params,
libraries=[atlas_library],
)
layer_2_task = DatabricksSubmitRunOperator(
task_id="Load_our_data_name_to_standardized",
databricks_conn_id=databricks_connection,
dag=dag,
json=create_our_data_name_standardized_table_from_source_xml_notebook_params,
libraries=[
{"maven": {"coordinates": "com.databricks:spark-xml_2.11:0.5.0"}},
{"pypi": {"package": "inflection"}},
atlas_library,
],
)
layer_3_task = DatabricksSubmitRunOperator(
task_id="Load_our_data_name_to_enriched",
databricks_conn_id=databricks_connection,
dag=dag,
json=create_our_data_name_enriched_table_from_standardized_notebook_params,
libraries=[atlas_library],
)
layer_1_task >> layer_2_task >> layer_3_task