Python Can';t pickle<;类别';abc.类名'>;:abc上的属性查找类名失败
当我试图根据字典(“cmdList”)中定义的依赖关系创建依赖项(子任务)时,出现了上述错误。例如,“BDX010”是“BDX020”的依赖项。我使用的是Python 3.7 有关确切的错误消息,请参见底部的堆栈跟踪Python Can';t pickle<;类别';abc.类名'>;:abc上的属性查找类名失败,python,luigi,Python,Luigi,当我试图根据字典(“cmdList”)中定义的依赖关系创建依赖项(子任务)时,出现了上述错误。例如,“BDX010”是“BDX020”的依赖项。我使用的是Python 3.7 有关确切的错误消息,请参见底部的堆栈跟踪 import luigi from helpers import SQLTask import helpers import logging import time acctDate = '201904' ssisDate = '201905' runDesc0xx = 'pr
import luigi
from helpers import SQLTask
import helpers
import logging
import time
acctDate = '201904'
ssisDate = '201905'
runDesc0xx = 'prod period 4 test2'
runDesc9xx = 'test2'
YY = acctDate[:4]
MM = acctDate[4:6]
bdx_sql = 'r:\\1.SQL\\BDX_SQL\\'
cmdList = {
'BDX010': (f'"{bdx_sql}BDX_001_NI_DM 010.sql" -S LWVPDBSQLC070 ',''),
'BDX020': (f'"{bdx_sql}BDX_001_NI_DM 020.sql" ','BDX010'),
'BDX022a': (f'"{bdx_sql}BDX_022_P038_All_Final_CatAdj 010.sql" -S LWVPDBSQLC070 ','BDX020'),
'BDX022b': (f'"{bdx_sql}BDX_022_P038_All_Final_CatAdj 020.sql" -S LWVPDBSQLC070 -v Year1={YY} MM={MM}','BDX022a'),
'BDX022c': (f'"{bdx_sql}BDX_022_P038_All_Final_CatAdj 030.sql" -v Year={YY} Month={MM}', 'BDX022b'),
}
class BDX_Task(SQLTask):
acctDate = luigi.Parameter()
ssisDate = luigi.Parameter(default=None)
queryKey = luigi.Parameter()
queryCmd = luigi.Parameter()
runDesc = luigi.Parameter()
dependQry = luigi.Parameter()
def __init__(self, *args, **kwargs):
super(BDX_Task, self).__init__(*args, **kwargs)
self.trans_id = f"00903_BDX_Query_{self.queryKey}__{self.runDesc}"
def requires(self):
cmdListComb = dict(cmdList)
if self.dependQry != '' and self.dependQry in cmdListComb:
dep_cmd, dep_dep_key = cmdListComb[self.dependQry]
klass = globals()[self.dependQry]
return [klass(
acctDate = self.acctDate,
ssisDate = self.ssisDate,
queryKey = self.dependQry,
queryCmd = dep_cmd,
runDesc = self.runDesc,
dependQry = dep_dep_key
)]
else:
return []
def run(self):
strQuery_and_args = f""" -i {self.queryCmd} """
time.sleep(5)
print(strQuery_and_args)
self.get_target().touch()
class BDX_Query_0XX(SQLTask):
acctDate = luigi.Parameter()
ssisDate = luigi.Parameter()
runDesc = luigi.Parameter()
def __init__(self, *args, **kwargs):
super(BDX_Query_0XX, self).__init__(*args, **kwargs)
self.trans_id = "00902_BDX_Query_0XX" + "__" + self.runDesc # static.
def requires(self):
for queryKey, (queryCmd, dependQry) in cmdList.items():
klass = type(queryKey, (BDX_Task,),{})
globals()[queryKey] = klass
yield klass(
acctDate = self.acctDate,
ssisDate = self.ssisDate,
queryKey = queryKey,
queryCmd = queryCmd,
runDesc = self.runDesc,
dependQry = dependQry
)
def run(self):
self.get_target().touch()
class BDX_Query_Main(SQLTask):
acctDate = luigi.Parameter(default=acctDate)
ssisDate = luigi.Parameter(default=ssisDate) # one month lag/later than acctDate
# runDesc = globals().runDesc
trans_id = "09000_Metaclass test" + "__" + runDesc9xx # static.
def requires(self):
YY = self.acctDate[:4]
MM = self.acctDate[4:6]
acctDate = self.acctDate
ssisDate = self.ssisDate
return [BDX_Query_0XX( acctDate=self.acctDate, ssisDate = self.ssisDate, runDesc = runDesc0xx )
]
def run(self):
self.get_target().touch()
if __name__ == '__main__':
luigi.run()
堆栈跟踪:
DEBUG: Checking if BDX_Query_Main(acctDate=201904, ssisDate=201905) is complete
DEBUG: Checking if BDX_Query_0XX(acctDate=201904, ssisDate=201905, runDesc=prod period 4 test2) is complete
INFO: Informed scheduler that task BDX_Query_Main_201904_201905_444c47aebc has status PENDING
DEBUG: BDX_Task.__init__ called for queryKey ="BDX010"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX020"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX022a"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX022b"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX022c"
DEBUG: Checking if BDX010(acctDate=201904, ssisDate=201905, queryKey=BDX010, queryCmd="r:\1.SQL\BDX_SQL\BDX_001_NI_DM 010.sql" -S LWVPDBSQLC070 , runDesc=prod period 4 test2, dependQry=) is complete
DEBUG: Checking if BDX020(acctDate=201904, ssisDate=201905, queryKey=BDX020, queryCmd="r:\1.SQL\BDX_SQL\BDX_001_NI_DM 020.sql" , runDesc=prod period 4 test2, dependQry=BDX010) is complete
DEBUG: Checking if BDX022a(acctDate=201904, ssisDate=201905, queryKey=BDX022a, queryCmd="r:\1.SQL\BDX_SQL\BDX_022_P038_All_Final_CatAdj 010.sql" -S LWVPDBSQLC070 , runDesc=prod period 4 test2, dependQry=BDX020) is complete
DEBUG: Checking if BDX022b(acctDate=201904, ssisDate=201905, queryKey=BDX022b, queryCmd="r:\1.SQL\BDX_SQL\BDX_022_P038_All_Final_CatAdj 020.sql" -S LWVPDBSQLC070 -v Year1=2019 MM=04, runDesc=prod period 4 test2, dependQry=BDX022a) is complete
DEBUG: Checking if BDX022c(acctDate=201904, ssisDate=201905, queryKey=BDX022c, queryCmd="r:\1.SQL\BDX_SQL\BDX_022_P038_All_Final_CatAdj 030.sql" -v Year=2019 Month=04, runDesc=prod period 4 test2, dependQry=BDX022b) is complete
INFO: Informed scheduler that task BDX_Query_0XX_201904_prod_period_4_te_201905_73ccfa7be3 has status PENDING
INFO: Informed scheduler that task BDX022c_201904_BDX022b__r__1_SQL_BDX_SQ_5c6660ab25 has status PENDING
INFO: Informed scheduler that task BDX022b_201904_BDX022a__r__1_SQL_BDX_SQ_c0677e7954 has status PENDING
INFO: Informed scheduler that task BDX022a_201904_BDX020__r__1_SQL_BDX_SQ_784cf5b40a has status PENDING
INFO: Informed scheduler that task BDX020_201904_BDX010__r__1_SQL_BDX_SQ_d37e4e46a2 has status PENDING
INFO: Informed scheduler that task BDX010_201904___r__1_SQL_BDX_SQ_9d353a8cd3 has status PENDING
INFO: Done scheduling tasks
INFO: Running Worker with 5 processes
DEBUG: Asking scheduler for work...
DEBUG: Pending tasks: 7
INFO: Worker Worker(salt=751624561, workers=5, host=LWVPWEACT001, username=i805649, pid=4108) was stopped. Shutting down Keep-Alive thread
Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1664, in <module>
main()
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1658, in main
globals = debugger.run(setup['file'], None, None, is_module)
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1068, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "R:/1.PY/DataPipeLine/run_test.py", line 178, in <module>
luigi.run()
File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\interface.py", line 192, in run
return _run(*args, **kwargs)['success']
File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\interface.py", line 209, in _run
return _schedule_and_run([cp.get_task_obj()], worker_scheduler_factory)
File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\interface.py", line 172, in _schedule_and_run
success &= worker.run()
File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\worker.py", line 1184, in run
self._run_task(get_work_response.task_id)
File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\worker.py", line 996, in _run_task
task_process.start()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
reduction.dump(process_obj, to_child)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
**_pickle.PicklingError: Can't pickle <class 'abc.BDX010'>: attribute lookup BDX010 on abc failed**
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 115, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
DEBUG:检查BDX\u Query\u Main(acctDate=201904,ssisDate=201905)是否完成
调试:检查BDX_Query_0XX(acctDate=201904,SSIDATE=201905,runDesc=prod period 4 test2)是否完成
信息:通知调度程序任务BDX_Query_Main_201904_201905_444c47aebc的状态为挂起
调试:BDX_任务。为queryKey=“BDX010”调用了\uuuu init_uuuuuu
调试:BDX\U任务。为queryKey=“BDX020”调用了\uuuu init\uuuuuuu
调试:BDX_任务。为queryKey=“BDX022a”调用了\uuuu init_uuuuuu
调试:BDX_任务。为queryKey=“BDX022b”调用了
调试:BDX_任务。为queryKey=“BDX022c”调用了\uuuu init
调试:检查BDX010(acctDate=201904,SSIDATE=201905,queryKey=BDX010,queryCmd=“r:\1.SQL\BDX\u SQL\BDX\u 001\u NI\u DM 010.SQL”-S LWVPDBSQLC070,runDesc=prod period 4 test2,dependQry=)是否完成
调试:检查BDX020(acctDate=201904,SSIDATE=201905,queryKey=BDX020,queryCmd=“r:\1.SQL\BDX\u SQL\BDX\u 001\u NI\u DM 020.SQL”,runDesc=prod period 4 test2,dependQry=BDX010)是否完成
调试:检查BDX022a(acctDate=201904,SSIDATE=201905,queryKey=BDX022a,queryCmd=“r:\1.SQL\BDX\U SQL\BDX\U 022\U P038\U All\U Final\U CatAdj 010.SQL”-S LWVPDBSQLC070,runDesc=prod period 4 test2,dependQry=BDX020)是否完成
调试:检查BDX022b(acctDate=201904,SSIDATE=201905,queryKey=BDX022b,queryCmd=“r:\1.SQL\BDX\U SQL\BDX\U 022\U P038\U All\U Final\U CatAdj 020.SQL”-S LWVPDBSQLC070-v Year1=2019 MM=04,RUNDDESC=prod period 4 test2,dependQry=BDX022a)是否完成
调试:检查BDX022c(acctDate=201904,SSIDATE=201905,queryKey=BDX022c,queryCmd=“r:\1.SQL\BDX\U SQL\BDX\U 022\U P038\U All\U Final\U CatAdj 030.SQL”-v Year=2019 MOUNT=04,RUNDDESC=prod PERTIOND 4 test2,dependQry=BDX022b)是否完成
信息:通知调度程序任务BDX_查询_0XX_201904_生产期_4_te_201905_73ccfa7be3的状态为挂起
信息:通知调度程序任务BDX022c\U 201904\U BDX022b\U r\U 1\U SQL\U BDX\U SQ\U 5c6660ab25的状态为挂起
信息:通知调度程序任务BDX022b\U 201904\U BDX022a\U r\U 1\U SQL\U BDX\U SQ\U c0677e7954的状态为挂起
信息:通知计划程序任务BDX022a\U 201904\U BDX020\U r\U 1\U SQL\U BDX\U SQ\U 784cf5b40a的状态为挂起
信息:通知调度程序任务BDX020\U 201904\U BDX010\U r\U 1\U SQL\U BDX\U SQ\U D37E46A2的状态为挂起
信息:通知调度程序任务BDX010\u 201904\u\u r\u\u 1\u SQL\u BDX\u SQ\u 9d353a8cd3的状态为挂起
信息:已完成计划任务
信息:使用5个进程运行Worker
调试:请求调度程序进行工作。。。
调试:挂起的任务:7
信息:工作线程(salt=751624561,工作线程=5,主机=LWVPWEACT001,用户名=i805649,pid=4108)已停止。正在关闭保持活动线程
回溯(最近一次呼叫最后一次):
文件“C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py”,第1664行,在
main()
文件“C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py”,第1658行,主目录
globals=debugger.run(setup['file'],None,None,is_模块)
文件“C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py”,第1068行,正在运行
pydev_imports.execfile(文件、全局、局部)#执行脚本
文件“C:\Program Files\JetBrains\PyCharm社区版2018.2.4\helpers\pydev\\u pydev\u imps\\u pydev\u execfile.py”,第18行,在execfile中
exec(编译(内容+“\n”,文件,'exec'),全局,loc)
文件“R:/1.PY/DataPipeLine/run_test.PY”,第178行,在
luigi.run()
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\interface.py”,第192行,正在运行
返回_run(*args,**kwargs)[“成功”]
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\interface.py”,第209行,正在运行
返回_schedule_和_run([cp.get_task_obj()],worker_scheduler_工厂)
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\interface.py”,第172行,在\u schedule\u和\u run中
成功&=worker.run()
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\worker.py”,第1184行,正在运行
self.\u run\u task(获取\u work\u response.task\u id)
文件“C:\ProgramData\Anaconda3\lib\site packages\luigi\worker.py”,第996行,在运行任务中
task_process.start()
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\process.py”,第112行,在开始处
self.\u popen=self.\u popen(self)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\context.py”,第223行,在\u Popen中
返回_default_context.get_context().Process._Popen(Process_obj)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\context.py”,第322行,在\u Popen中
返回Popen(过程对象)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\popen\u spawn\u win32.py”,第65行,在\uuu init中__
减少.转储(进程对象,到子进程)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\reduce.py”,第60行,在转储中
ForkingPickler(文件、协议).dump(obj)
**_pickle.PicklingError:无法pickle:abc上的属性查找BDX010失败**
回溯(最近一次呼叫最后一次):
文件“”,第1行,在
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py”,第105行,在spawn\u main中
出口代码=_主(fd)
文件“C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py”,第115行,在_main中
self=reduce.pickle.load(从父级)
EOFError:输入不足
当使用ABC的元类动态创建类时,模块变为ABC,当工作人员尝试查找任务时,它会转到抽象基类模块并尝试在那里查找,但它当然不存在
要解决这个问题,请确保luigi知道在哪里可以找到构建类的代码,方法是手动重置\uuuu module\uuu
变量
将行更改为:
klass = type(queryKey, (BDX_Task,),{'__module__':__name__})
据我所知,这只是个问题
# Run this first outside any other logic so it gets run if someone imports the module:
for queryKey in cmdList.keys():
globals()[queryKey] = type(queryKey,(BDX_Task,){'__module__':__name__})
#Then you requires function can look like:
class BDX_Query_0XX(SQLTask):
# ...
def requires(self):
for queryKey, (queryCmd, dependQry) in cmdList.items():
yield globals()[queryKey](
acctDate = self.acctDate,
ssisDate = self.ssisDate,
queryKey = queryKey,
queryCmd = queryCmd,
runDesc = self.runDesc,
dependQry = dependQry
)