Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/silverlight/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Airflow 表示任务对非直接父任务的依赖关系_Airflow - Fatal编程技术网

Airflow 表示任务对非直接父任务的依赖关系

Airflow 表示任务对非直接父任务的依赖关系,airflow,Airflow,我有一个非常基本的气流工作流程和一个我似乎无法克服的小障碍。所以我的目标是有两组任务。首先,第一组应该运行,然后是下一组。问题是,在组2中,我需要为组1中的任务创建依赖项。如果组1中的某个任务失败,那么我们可以从组2中跳过它的依赖项 因此,这是一个我可以轻松实现的布局: 但我真正需要的是这样的东西(粉色表示跳过): 所以我正在寻找一种动态设计这种依赖关系的方法。或者我想我实际上需要的是动态地说我想跳过一个任务。非常感谢您的帮助。我在考虑使用XCOM,但我不知道如何在运行时跳过任务。一个选项是

我有一个非常基本的气流工作流程和一个我似乎无法克服的小障碍。所以我的目标是有两组任务。首先,第一组应该运行,然后是下一组。问题是,在组2中,我需要为组1中的任务创建依赖项。如果组1中的某个任务失败,那么我们可以从组2中跳过它的依赖项

因此,这是一个我可以轻松实现的布局:

但我真正需要的是这样的东西(粉色表示跳过):


所以我正在寻找一种动态设计这种依赖关系的方法。或者我想我实际上需要的是动态地说我想跳过一个任务。非常感谢您的帮助。我在考虑使用XCOM,但我不知道如何在运行时跳过任务。一个选项是使用
PythonBranchOperator
,但我需要将它附加到每个看起来有点复杂的任务中。

因此我提出了这个混合,允许您在它所依赖的任何任务未成功时将任务状态设置为跳过:

from airflow.exceptions import AirflowSkipException
from airflow.models import TaskInstance
from airflow.utils.db import provide_session
from airflow.utils.state import State


class SkippableOperatorMixin:
    """
    This mixin extends an Airflow operators functionality with
    the ability to skip its execution if any of the tasks it depends on did not succeed.
    """
    depends_on_tasks = []

    def __init__(self, depends_on_tasks=[], **kwargs):
        self.depends_on_tasks = depends_on_tasks
        super().__init__(**kwargs)

    def _get_subdag_id_and_task_id(self, task_id):
        """
        Gets the subdag_id and task_id from the task_id

        Example:
          Task1 => (None, 'Task1')
          MainDag.SubDag1.Task1 => ('MainDag.SubDag1', 'Task1')
        """
        task_arr = task_id.split('.')
        return '.'.join(task_arr[:-1]), task_arr[-1]

    @provide_session
    def current_state(self, task_id, execution_date, session=None):
        """
        Get the very latest state of a task identified by the task_id and execution_date from the database.
        """
        TI = TaskInstance
        dag_id, task_id = self._get_subdag_id_and_task_id(task_id)

        qry = session.query(TI).filter(
            TI.task_id == task_id,
            TI.execution_date == execution_date,
        )

        if dag_id:
            qry = qry.filter(TI.dag_id == dag_id)

        ti = qry.all()

        if ti:
            state = ti[0].state
        else:
            state = None
        return state

    def pre_execute(self, context):
        execution_date = context['execution_date']
        skip_task = False
        prerequisite_states = [self.current_state(task_id, execution_date) for task_id in self.depends_on_tasks]
        for state in prerequisite_states:
            if state != State.SUCCESS:
                skip_task = True

        if skip_task:
            raise AirflowSkipException
用法示例:

class MySkippableOperator(SkippableOperatorMixin, MyOperator):
    pass


task = MySkippableOperator(
    # All the properties you would pass to YourOperator
    depends_on_tasks = ['TaskId1', 'TaskId2']
)

因此,我提出了这个mixin,它允许您在它所依赖的任何任务未成功时将任务状态设置为跳过:

from airflow.exceptions import AirflowSkipException
from airflow.models import TaskInstance
from airflow.utils.db import provide_session
from airflow.utils.state import State


class SkippableOperatorMixin:
    """
    This mixin extends an Airflow operators functionality with
    the ability to skip its execution if any of the tasks it depends on did not succeed.
    """
    depends_on_tasks = []

    def __init__(self, depends_on_tasks=[], **kwargs):
        self.depends_on_tasks = depends_on_tasks
        super().__init__(**kwargs)

    def _get_subdag_id_and_task_id(self, task_id):
        """
        Gets the subdag_id and task_id from the task_id

        Example:
          Task1 => (None, 'Task1')
          MainDag.SubDag1.Task1 => ('MainDag.SubDag1', 'Task1')
        """
        task_arr = task_id.split('.')
        return '.'.join(task_arr[:-1]), task_arr[-1]

    @provide_session
    def current_state(self, task_id, execution_date, session=None):
        """
        Get the very latest state of a task identified by the task_id and execution_date from the database.
        """
        TI = TaskInstance
        dag_id, task_id = self._get_subdag_id_and_task_id(task_id)

        qry = session.query(TI).filter(
            TI.task_id == task_id,
            TI.execution_date == execution_date,
        )

        if dag_id:
            qry = qry.filter(TI.dag_id == dag_id)

        ti = qry.all()

        if ti:
            state = ti[0].state
        else:
            state = None
        return state

    def pre_execute(self, context):
        execution_date = context['execution_date']
        skip_task = False
        prerequisite_states = [self.current_state(task_id, execution_date) for task_id in self.depends_on_tasks]
        for state in prerequisite_states:
            if state != State.SUCCESS:
                skip_task = True

        if skip_task:
            raise AirflowSkipException
用法示例:

class MySkippableOperator(SkippableOperatorMixin, MyOperator):
    pass


task = MySkippableOperator(
    # All the properties you would pass to YourOperator
    depends_on_tasks = ['TaskId1', 'TaskId2']
)