Python 3.x 属性错误:';路径分布';对象没有属性';名称';

Python 3.x 属性错误:';路径分布';对象没有属性';名称';,python-3.x,celery,spacy,celery-task,Python 3.x,Celery,Spacy,Celery Task,我正在尝试使用芹菜和文档来运行一个简单的工作流。我使用chain按顺序运行任务,并执行以下工作流 提取一个文件,将其标记化,并将文档的语句标记的JSON转储加载到另一个(新)文件中。在文件夹中的文件列表上迭代工作流 以下是我的代码:- 文件夹结构 celery-pipeline/ ├── celeryapp.py ├── celeryconfig.py ├── data/ ├── output/ └── tasks.py celeryapp.py from celery import Cele

我正在尝试使用
芹菜
和文档来运行一个简单的工作流。我使用chain按顺序运行任务,并执行以下工作流

提取一个文件,将其标记化,并将文档的语句标记的JSON转储加载到另一个(新)文件中。在文件夹中的文件列表上迭代工作流

以下是我的代码:-

文件夹结构

celery-pipeline/
├── celeryapp.py
├── celeryconfig.py
├── data/
├── output/
└── tasks.py
celeryapp.py

from celery import Celery

app = Celery()
app.config_from_object('celeryconfig')
imports = ('tasks',)
broker_url =  'redis://localhost:6379/0'
result_backend = 'db+postgresql://celery_user:celery_user@127.0.0.1:5432/celery_db'

task_ignore_result = False
task_track_started = True
task_default_queue = 'default'
task_default_rate_limit = '20/s'
task_time_limit = 7200
worker_pool_restarts = True
import os
import json
import spacy
import logging
from datetime import datetime, timedelta

from celeryapp import app

sp = spacy.load('en_core_web_sm')

@app.task(bind=True)
def extract(self, filename):
    file_path = os.path.join(os.getcwd(), 'data', filename)
    doc = open(file_path).read()
    print('Extract called')
    return doc

@app.task(bind=True)
def transform_tokenize_doc(self, doc:str):
    sentences = []

    for sent in sp(doc).sents:
        sentences.append(str(sent).strip())

    return sentences

@app.task(bind=True)
def load(self, filename, *args):
    with open(os.path.join(os.getcwd(), 'output', filename), 'a+') as file:
        file.write(json.dumps(args, indent=4))


if __name__ == '__main__':
    tasks = []

    for filename in os.listdir(os.path.join(os.getcwd(), 'data'))[:10]:
        print(f'filename is {filename}')
        etl = (extract.s(filename) | transform_tokenize_doc.s() | load.s(filename)).apply_async()
        tasks.append(etl)

    for task in tasks:
        task.get()
celeryconfig.py

from celery import Celery

app = Celery()
app.config_from_object('celeryconfig')
imports = ('tasks',)
broker_url =  'redis://localhost:6379/0'
result_backend = 'db+postgresql://celery_user:celery_user@127.0.0.1:5432/celery_db'

task_ignore_result = False
task_track_started = True
task_default_queue = 'default'
task_default_rate_limit = '20/s'
task_time_limit = 7200
worker_pool_restarts = True
import os
import json
import spacy
import logging
from datetime import datetime, timedelta

from celeryapp import app

sp = spacy.load('en_core_web_sm')

@app.task(bind=True)
def extract(self, filename):
    file_path = os.path.join(os.getcwd(), 'data', filename)
    doc = open(file_path).read()
    print('Extract called')
    return doc

@app.task(bind=True)
def transform_tokenize_doc(self, doc:str):
    sentences = []

    for sent in sp(doc).sents:
        sentences.append(str(sent).strip())

    return sentences

@app.task(bind=True)
def load(self, filename, *args):
    with open(os.path.join(os.getcwd(), 'output', filename), 'a+') as file:
        file.write(json.dumps(args, indent=4))


if __name__ == '__main__':
    tasks = []

    for filename in os.listdir(os.path.join(os.getcwd(), 'data'))[:10]:
        print(f'filename is {filename}')
        etl = (extract.s(filename) | transform_tokenize_doc.s() | load.s(filename)).apply_async()
        tasks.append(etl)

    for task in tasks:
        task.get()
任务。py

from celery import Celery

app = Celery()
app.config_from_object('celeryconfig')
imports = ('tasks',)
broker_url =  'redis://localhost:6379/0'
result_backend = 'db+postgresql://celery_user:celery_user@127.0.0.1:5432/celery_db'

task_ignore_result = False
task_track_started = True
task_default_queue = 'default'
task_default_rate_limit = '20/s'
task_time_limit = 7200
worker_pool_restarts = True
import os
import json
import spacy
import logging
from datetime import datetime, timedelta

from celeryapp import app

sp = spacy.load('en_core_web_sm')

@app.task(bind=True)
def extract(self, filename):
    file_path = os.path.join(os.getcwd(), 'data', filename)
    doc = open(file_path).read()
    print('Extract called')
    return doc

@app.task(bind=True)
def transform_tokenize_doc(self, doc:str):
    sentences = []

    for sent in sp(doc).sents:
        sentences.append(str(sent).strip())

    return sentences

@app.task(bind=True)
def load(self, filename, *args):
    with open(os.path.join(os.getcwd(), 'output', filename), 'a+') as file:
        file.write(json.dumps(args, indent=4))


if __name__ == '__main__':
    tasks = []

    for filename in os.listdir(os.path.join(os.getcwd(), 'data'))[:10]:
        print(f'filename is {filename}')
        etl = (extract.s(filename) | transform_tokenize_doc.s() | load.s(filename)).apply_async()
        tasks.append(etl)

    for task in tasks:
        task.get()
在根文件夹内运行芹菜-任务工作者--loglevel=info时-
芹菜管道/
,我遇到以下错误:-

Traceback (most recent call last):
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/kombu/utils/objects.py", line 41, in __get__
    return obj.__dict__[self.__name__]
KeyError: 'control'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ubuntu/Documents/projects/celery-venv/bin/celery", line 8, in <module>
    sys.exit(main())
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/__main__.py", line 15, in main
    sys.exit(_main())
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bin/celery.py", line 213, in main
    return celery(auto_envvar_prefix="CELERY")
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py", line 829, in __call__
    return self.main(*args, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py", line 782, in main
    rv = self.invoke(ctx)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py", line 1259, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py", line 1066, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py", line 610, in invoke
    return callback(*args, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/decorators.py", line 21, in new_func
    return f(get_current_context(), *args, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bin/base.py", line 132, in caller
    return f(ctx, *args, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bin/worker.py", line 326, in worker
    **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/worker.py", line 99, in __init__
    self.setup_instance(**self.prepare_args(**kwargs))
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/worker.py", line 139, in setup_instance
    self.blueprint.apply(self, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py", line 211, in apply
    step.include(parent)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py", line 379, in include
    inc, ret = self._should_include(parent)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py", line 335, in _should_include
    return True, self.create(parent)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/components.py", line 238, in create
    prefetch_multiplier=w.prefetch_multiplier,
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py", line 331, in instantiate
    return instantiate(name, *args, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/utils/imports.py", line 44, in instantiate
    return symbol_by_name(name)(*args, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/consumer/consumer.py", line 212, in __init__
    self.blueprint.apply(self, **dict(worker_options or {}, **kwargs))
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py", line 205, in apply
    step = S(parent, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/consumer/control.py", line 25, in __init__
    self.box = (pidbox.gPidbox if self.is_green else pidbox.Pidbox)(c)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/pidbox.py", line 28, in __init__
    self.node = c.app.control.mailbox.Node(
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/kombu/utils/objects.py", line 43, in __get__
    value = obj.__dict__[self.__name__] = self.__get(obj)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/app/base.py", line 1230, in control
    return instantiate(self.control_cls, app=self)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/utils/imports.py", line 44, in instantiate
    return symbol_by_name(name)(*args, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/kombu/utils/imports.py", line 56, in symbol_by_name
    module = imp(module_name, package=package, **kwargs)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/importlib/__init__.py", line 126, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 994, in _gcd_import
  File "<frozen importlib._bootstrap>", line 971, in _find_and_load
  File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 665, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 678, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/app/control.py", line 9, in <module>
    from kombu.matcher import match
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/kombu/matcher.py", line 132, in <module>
    for ep, args in entrypoints('kombu.matchers'):
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/kombu/utils/compat.py", line 93, in entrypoints
    for ep in importlib_metadata.entry_points().get(namespace, [])
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/importlib_metadata/__init__.py", line 865, in entry_points
    return SelectableGroups.load(eps).select(**params)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/importlib_metadata/__init__.py", line 340, in load
    ordered = sorted(eps, key=by_group)
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/importlib_metadata/__init__.py", line 863, in <genexpr>
    dist.entry_points for dist in unique(distributions())
  File "/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/importlib_metadata/_itertools.py", line 16, in unique_everseen
    k = key(element)
AttributeError: 'PathDistribution' object has no attribute 'name'
回溯(最近一次呼叫最后一次):
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/kombu/utils/objects.py”,第41行,在__
返回对象名称
KeyError:“控件”
在处理上述异常期间,发生了另一个异常:
回溯(最近一次呼叫最后一次):
文件“/home/ubuntu/Documents/projects/芹菜venv/bin/芹菜”,第8行,在
sys.exit(main())
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/__-main__.py”,第15行,在main中
系统退出(_main())
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bin/celery.py”,第213行,主文件
返回芹菜(auto_envvar_prefix=“芹菜”)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py”,第829行,在调用中__
返回self.main(*args,**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py”,第782行,在主目录中
rv=自调用(ctx)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py”,调用中的第1259行
返回进程结果(sub_ctx.command.invoke(sub_ctx))
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py”,调用中的第1066行
返回ctx.invoke(self.callback,**ctx.params)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/core.py”,调用中第610行
返回回调(*args,**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/click/decorators.py”,第21行,在new_-func中
返回f(获取当前上下文(),*args,**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bin/base.py”,第132行,调用者
返回f(ctx,*args,**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bin/worker.py”,第326行,在worker中
**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/worker.py”,第99行,在__
self.setup_实例(**self.prepare_参数(**kwargs))
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/worker.py”,第139行,在setup_实例中
self.blueprint.apply(self,**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py”,第211行,在apply中
步骤.包括(父级)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py”,第379行,包含
inc,ret=self.\u应包括(母公司)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py”,第335行,应包括
返回True,self.create(父级)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/components.py”,第238行,在create中
预回迁乘数=w.预回迁乘数,
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py”,第331行,在实例化中
返回实例化(名称,*args,**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/utils/imports.py”,实例化中第44行
返回符号按名称(名称)(*args,**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/consumer/consumer.py”,第212行,在__
self.blueprint.apply(self,**dict(worker_选项或{},**kwargs))
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/bootsteps.py”,第205行,在apply中
步骤=S(父级,**kwargs)
文件“/home/ubuntu/Documents/projects/celery venv/lib/python3.6/site packages/celery/worker/consumer/control.py”,第25行,在__
self.box=(如果self.is_绿色,则为pidbox.gPidbox,否则为pidbox.pidbox)(c)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/worker/pidbox.py”,第28行,在__
self.node=c.app.control.mailbox.node(
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/kombu/utils/objects.py”,第43行,在__
value=obj.\uuuu dict\uuuu[self.\uuuuuu name\uuuuu]=self.\uuuuu get(obj)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/app/base.py”,第1230行,处于控制状态
返回实例化(self.control\u cls,app=self)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/celery/utils/imports.py”,实例化中第44行
返回符号按名称(名称)(*args,**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/site-packages/kombu/utils/imports.py”,第56行,符号为“按名称”
模块=imp(模块名称,包=包,**kwargs)
文件“/home/ubuntu/Documents/projects/celery-venv/lib/python3.6/importlib/_-init__.py”,第126行,在导入模块中
return _bootstrap._gcd_import(名称[级别:],包,级别)
文件“”,第994行,在_gcd_导入中
文件“”,第971行,在_find_和_load中
文件“”,第955行,在“查找”和“加载”中解锁
文件“”,第665行,在“加载”中
exec_模块中第678行的文件“”
文件“”,第219行,在带帧的调用中