Python Can';t pickle<;类别';abc.类名'>;:abc上的属性查找类名失败

Python Can';t pickle<;类别';abc.类名'>;:abc上的属性查找类名失败,python,luigi,Python,Luigi,当我试图根据字典(“cmdList”)中定义的依赖关系创建依赖项(子任务)时,出现了上述错误。例如,“BDX010”是“BDX020”的依赖项。我使用的是Python 3.7 有关确切的错误消息,请参见底部的堆栈跟踪 import luigi from helpers import SQLTask import helpers import logging import time acctDate = '201904' ssisDate = '201905' runDesc0xx = 'pr

当我试图根据字典(“cmdList”)中定义的依赖关系创建依赖项(子任务)时,出现了上述错误。例如,“BDX010”是“BDX020”的依赖项。我使用的是Python 3.7

有关确切的错误消息,请参见底部的堆栈跟踪

import luigi
from helpers import SQLTask
import helpers
import logging 
import time

acctDate = '201904'
ssisDate = '201905'
runDesc0xx = 'prod period 4 test2'  
runDesc9xx = 'test2'  

YY = acctDate[:4]
MM = acctDate[4:6]


bdx_sql = 'r:\\1.SQL\\BDX_SQL\\'
cmdList = {
        'BDX010': (f'"{bdx_sql}BDX_001_NI_DM 010.sql" -S LWVPDBSQLC070 ',''),
        'BDX020': (f'"{bdx_sql}BDX_001_NI_DM 020.sql"  ','BDX010'),
        'BDX022a': (f'"{bdx_sql}BDX_022_P038_All_Final_CatAdj 010.sql"  -S LWVPDBSQLC070 ','BDX020'),
        'BDX022b': (f'"{bdx_sql}BDX_022_P038_All_Final_CatAdj 020.sql"  -S LWVPDBSQLC070  -v Year1={YY} MM={MM}','BDX022a'),
        'BDX022c': (f'"{bdx_sql}BDX_022_P038_All_Final_CatAdj 030.sql"  -v Year={YY} Month={MM}', 'BDX022b'),
    }

class BDX_Task(SQLTask):
    acctDate = luigi.Parameter()
    ssisDate = luigi.Parameter(default=None)
    queryKey = luigi.Parameter()
    queryCmd = luigi.Parameter()
    runDesc = luigi.Parameter()
    dependQry = luigi.Parameter()


    def __init__(self, *args, **kwargs):
        super(BDX_Task, self).__init__(*args, **kwargs)
        self.trans_id = f"00903_BDX_Query_{self.queryKey}__{self.runDesc}"

    def requires(self):
        cmdListComb = dict(cmdList)

        if self.dependQry != '' and self.dependQry in cmdListComb:
            dep_cmd, dep_dep_key = cmdListComb[self.dependQry]

            klass = globals()[self.dependQry]
            return [klass(         
                acctDate = self.acctDate,
                ssisDate = self.ssisDate,
                queryKey = self.dependQry,
                queryCmd = dep_cmd,
                runDesc = self.runDesc,
                dependQry = dep_dep_key
            )]
        else:
            return []

    def run(self):

        strQuery_and_args = f""" -i {self.queryCmd} """
        time.sleep(5)
        print(strQuery_and_args)
        self.get_target().touch()


class BDX_Query_0XX(SQLTask):
    acctDate = luigi.Parameter()
    ssisDate = luigi.Parameter()  
    runDesc = luigi.Parameter()


    def __init__(self, *args, **kwargs):
        super(BDX_Query_0XX, self).__init__(*args, **kwargs)

        self.trans_id =  "00902_BDX_Query_0XX" + "__" + self.runDesc  # static.


    def requires(self):
        for queryKey, (queryCmd, dependQry) in cmdList.items():
            klass = type(queryKey, (BDX_Task,),{})
            globals()[queryKey] = klass
            yield klass(
                acctDate = self.acctDate,
                ssisDate = self.ssisDate,
                queryKey = queryKey,
                queryCmd = queryCmd,
                runDesc = self.runDesc,  
                dependQry = dependQry
            )

    def run(self):
        self.get_target().touch()



class BDX_Query_Main(SQLTask):
    acctDate = luigi.Parameter(default=acctDate)
    ssisDate = luigi.Parameter(default=ssisDate)  # one month lag/later than acctDate
    # runDesc = globals().runDesc

    trans_id = "09000_Metaclass test" + "__" + runDesc9xx  # static.

    def requires(self):
        YY = self.acctDate[:4]
        MM = self.acctDate[4:6]
        acctDate = self.acctDate
        ssisDate = self.ssisDate

        return [BDX_Query_0XX( acctDate=self.acctDate, ssisDate = self.ssisDate, runDesc = runDesc0xx )
               ]

    def run(self):
        self.get_target().touch()


if __name__ == '__main__':
    luigi.run()
堆栈跟踪:

DEBUG: Checking if BDX_Query_Main(acctDate=201904, ssisDate=201905) is complete
DEBUG: Checking if BDX_Query_0XX(acctDate=201904, ssisDate=201905, runDesc=prod period 4 test2) is complete
INFO: Informed scheduler that task   BDX_Query_Main_201904_201905_444c47aebc   has status   PENDING
DEBUG: BDX_Task.__init__ called for queryKey ="BDX010"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX020"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX022a"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX022b"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX022c"
DEBUG: Checking if BDX010(acctDate=201904, ssisDate=201905, queryKey=BDX010, queryCmd="r:\1.SQL\BDX_SQL\BDX_001_NI_DM 010.sql" -S LWVPDBSQLC070 , runDesc=prod period 4 test2, dependQry=) is complete
DEBUG: Checking if BDX020(acctDate=201904, ssisDate=201905, queryKey=BDX020, queryCmd="r:\1.SQL\BDX_SQL\BDX_001_NI_DM 020.sql"  , runDesc=prod period 4 test2, dependQry=BDX010) is complete
DEBUG: Checking if BDX022a(acctDate=201904, ssisDate=201905, queryKey=BDX022a, queryCmd="r:\1.SQL\BDX_SQL\BDX_022_P038_All_Final_CatAdj 010.sql"  -S LWVPDBSQLC070 , runDesc=prod period 4 test2, dependQry=BDX020) is complete
DEBUG: Checking if BDX022b(acctDate=201904, ssisDate=201905, queryKey=BDX022b, queryCmd="r:\1.SQL\BDX_SQL\BDX_022_P038_All_Final_CatAdj 020.sql"  -S LWVPDBSQLC070  -v Year1=2019 MM=04, runDesc=prod period 4 test2, dependQry=BDX022a) is complete
DEBUG: Checking if BDX022c(acctDate=201904, ssisDate=201905, queryKey=BDX022c, queryCmd="r:\1.SQL\BDX_SQL\BDX_022_P038_All_Final_CatAdj 030.sql"  -v Year=2019 Month=04, runDesc=prod period 4 test2, dependQry=BDX022b) is complete
INFO: Informed scheduler that task   BDX_Query_0XX_201904_prod_period_4_te_201905_73ccfa7be3   has status   PENDING
INFO: Informed scheduler that task   BDX022c_201904_BDX022b__r__1_SQL_BDX_SQ_5c6660ab25   has status   PENDING
INFO: Informed scheduler that task   BDX022b_201904_BDX022a__r__1_SQL_BDX_SQ_c0677e7954   has status   PENDING
INFO: Informed scheduler that task   BDX022a_201904_BDX020__r__1_SQL_BDX_SQ_784cf5b40a   has status   PENDING
INFO: Informed scheduler that task   BDX020_201904_BDX010__r__1_SQL_BDX_SQ_d37e4e46a2   has status   PENDING
INFO: Informed scheduler that task   BDX010_201904___r__1_SQL_BDX_SQ_9d353a8cd3   has status   PENDING
INFO: Done scheduling tasks
INFO: Running Worker with 5 processes
DEBUG: Asking scheduler for work...
DEBUG: Pending tasks: 7
INFO: Worker Worker(salt=751624561, workers=5, host=LWVPWEACT001, username=i805649, pid=4108) was stopped. Shutting down Keep-Alive thread
Traceback (most recent call last):
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1664, in <module>
    main()
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1658, in main
    globals = debugger.run(setup['file'], None, None, is_module)
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1068, in run
    pydev_imports.execfile(file, globals, locals)  # execute the script
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
    exec(compile(contents+"\n", file, 'exec'), glob, loc)
  File "R:/1.PY/DataPipeLine/run_test.py", line 178, in <module>
    luigi.run()
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\interface.py", line 192, in run
    return _run(*args, **kwargs)['success']
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\interface.py", line 209, in _run
    return _schedule_and_run([cp.get_task_obj()], worker_scheduler_factory)
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\interface.py", line 172, in _schedule_and_run
    success &= worker.run()
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\worker.py", line 1184, in run
    self._run_task(get_work_response.task_id)
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\worker.py", line 996, in _run_task
    task_process.start()
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 112, in start
    self._popen = self._Popen(self)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 223, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 322, in _Popen
    return Popen(process_obj)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
    reduction.dump(process_obj, to_child)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
    ForkingPickler(file, protocol).dump(obj)
**_pickle.PicklingError: Can't pickle <class 'abc.BDX010'>: attribute lookup BDX010 on abc failed**

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 105, in spawn_main
    exitcode = _main(fd)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 115, in _main
    self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
DEBUG:检查BDX\u Query\u Main(acctDate=201904,ssisDate=201905)是否完成
调试:检查BDX_Query_0XX(acctDate=201904,SSIDATE=201905,runDesc=prod period 4 test2)是否完成
信息:通知调度程序任务BDX_Query_Main_201904_201905_444c47aebc的状态为挂起
调试:BDX_任务。为queryKey=“BDX010”调用了\uuuu init_uuuuuu
调试:BDX\U任务。为queryKey=“BDX020”调用了\uuuu init\uuuuuuu
调试:BDX_任务。为queryKey=“BDX022a”调用了\uuuu init_uuuuuu
调试:BDX_任务。为queryKey=“BDX022b”调用了
调试:BDX_任务。为queryKey=“BDX022c”调用了\uuuu init
调试:检查BDX010(acctDate=201904,SSIDATE=201905,queryKey=BDX010,queryCmd=“r:\1.SQL\BDX\u SQL\BDX\u 001\u NI\u DM 010.SQL”-S LWVPDBSQLC070,runDesc=prod period 4 test2,dependQry=)是否完成
调试:检查BDX020(acctDate=201904,SSIDATE=201905,queryKey=BDX020,queryCmd=“r:\1.SQL\BDX\u SQL\BDX\u 001\u NI\u DM 020.SQL”,runDesc=prod period 4 test2,dependQry=BDX010)是否完成
调试:检查BDX022a(acctDate=201904,SSIDATE=201905,queryKey=BDX022a,queryCmd=“r:\1.SQL\BDX\U SQL\BDX\U 022\U P038\U All\U Final\U CatAdj 010.SQL”-S LWVPDBSQLC070,runDesc=prod period 4 test2,dependQry=BDX020)是否完成
调试:检查BDX022b(acctDate=201904,SSIDATE=201905,queryKey=BDX022b,queryCmd=“r:\1.SQL\BDX\U SQL\BDX\U 022\U P038\U All\U Final\U CatAdj 020.SQL”-S LWVPDBSQLC070-v Year1=2019 MM=04,RUNDDESC=prod period 4 test2,dependQry=BDX022a)是否完成
调试:检查BDX022c(acctDate=201904,SSIDATE=201905,queryKey=BDX022c,queryCmd=“r:\1.SQL\BDX\U SQL\BDX\U 022\U P038\U All\U Final\U CatAdj 030.SQL”-v Year=2019 MOUNT=04,RUNDDESC=prod PERTIOND 4 test2,dependQry=BDX022b)是否完成
信息:通知调度程序任务BDX_查询_0XX_201904_生产期_4_te_201905_73ccfa7be3的状态为挂起
信息:通知调度程序任务BDX022c\U 201904\U BDX022b\U r\U 1\U SQL\U BDX\U SQ\U 5c6660ab25的状态为挂起
信息:通知调度程序任务BDX022b\U 201904\U BDX022a\U r\U 1\U SQL\U BDX\U SQ\U c0677e7954的状态为挂起
信息:通知计划程序任务BDX022a\U 201904\U BDX020\U r\U 1\U SQL\U BDX\U SQ\U 784cf5b40a的状态为挂起
信息:通知调度程序任务BDX020\U 201904\U BDX010\U r\U 1\U SQL\U BDX\U SQ\U D37E46A2的状态为挂起
信息:通知调度程序任务BDX010\u 201904\u\u r\u\u 1\u SQL\u BDX\u SQ\u 9d353a8cd3的状态为挂起
信息:已完成计划任务
信息:使用5个进程运行Worker
调试:请求调度程序进行工作。。。
调试:挂起的任务:7
信息:工作线程(salt=751624561,工作线程=5,主机=LWVPWEACT001,用户名=i805649,pid=4108)已停止。正在关闭保持活动线程
回溯(最近一次呼叫最后一次):
文件“C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py”,第1664行,在
main()
文件“C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py”,第1658行,主目录
globals=debugger.run(setup['file'],None,None,is_模块)
文件“C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py”,第1068行,正在运行
pydev_imports.execfile(文件、全局、局部)#执行脚本
文件“C:\Program Files\JetBrains\PyCharm社区版2018.2.4\helpers\pydev\\u pydev\u imps\\u pydev\u execfile.py”,第18行,在execfile中
exec(编译(内容+“\n”,文件,'exec'),全局,loc)
文件“R:/1.PY/DataPipeLine/run_test.PY”,第178行,在
luigi.run()
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\interface.py”,第192行,正在运行
返回_run(*args,**kwargs)[“成功”]
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\interface.py”,第209行,正在运行
返回_schedule_和_run([cp.get_task_obj()],worker_scheduler_工厂)
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\interface.py”,第172行,在\u schedule\u和\u run中
成功&=worker.run()
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\worker.py”,第1184行,正在运行
self.\u run\u task(获取\u work\u response.task\u id)
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\worker.py”,第996行,在运行任务中
task_process.start()
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\process.py”,第112行,在开始处
self.\u popen=self.\u popen(self)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\context.py”,第223行,在\u Popen中
返回_default_context.get_context().Process._Popen(Process_obj)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\context.py”,第322行,在\u Popen中
返回Popen(过程对象)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\popen\u spawn\u win32.py”,第65行,在\uuu init中__
减少.转储(进程对象,到子进程)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\reduce.py”,第60行,在转储中
ForkingPickler(文件、协议).dump(obj)
**_pickle.PicklingError:无法pickle:abc上的属性查找BDX010失败**
回溯(最近一次呼叫最后一次):
文件“”,第1行,在
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py”,第105行,在spawn\u main中
出口代码=_主(fd)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py”,第115行,在_main中
self=reduce.pickle.load(从父级)
EOFError:输入不足

当使用ABC的元类动态创建类时,模块变为ABC,当工作人员尝试查找任务时,它会转到抽象基类模块并尝试在那里查找,但它当然不存在

要解决这个问题,请确保luigi知道在哪里可以找到构建类的代码,方法是手动重置
\uuuu module\uuu
变量

将行更改为:

klass = type(queryKey, (BDX_Task,),{'__module__':__name__})
据我所知,这只是个问题
# Run this first outside any other logic so it gets run if someone imports the module:
for queryKey in cmdList.keys():
    globals()[queryKey] = type(queryKey,(BDX_Task,){'__module__':__name__})

#Then you requires function can look like:
class BDX_Query_0XX(SQLTask):

    # ...

    def requires(self):
        for queryKey, (queryCmd, dependQry) in cmdList.items():
            yield globals()[queryKey](
                acctDate = self.acctDate,
                ssisDate = self.ssisDate,
                queryKey = queryKey,
                queryCmd = queryCmd,
                runDesc = self.runDesc,  
                dependQry = dependQry
            )