Python Multiprocessing.dummy、Multiprocessing和map,如何执行错误处理?

Python Multiprocessing.dummy、Multiprocessing和map,如何执行错误处理?,python,error-handling,python-multithreading,python-multiprocessing,Python,Error Handling,Python Multithreading,Python Multiprocessing,我正在使用multiprocessing.dummy模块进行一些并发处理。我正在发出HTTP请求,对象可能不会返回任何数据。在这种情况下,我需要捕获属性错误,然后继续 我尝试在对象本身中捕获它,但仍然收到错误,唯一有效的方法是在pool.map调用自身时执行try/except。我想知道这是为什么,这是否是对多处理和映射函数进行错误处理的最佳方法 以下是我的一些代码供参考: all_commits = [] projects = [Project(value['id']) for val

我正在使用
multiprocessing.dummy
模块进行一些并发处理。我正在发出
HTTP
请求,对象可能不会返回任何数据。在这种情况下,我需要捕获
属性错误
,然后继续

我尝试在对象本身中捕获它,但仍然收到错误,唯一有效的方法是在
pool.map
调用自身时执行
try/except
。我想知道这是为什么,这是否是对
多处理
映射
函数进行错误处理的最佳方法

以下是我的一些代码供参考:

all_commits = []
    projects = [Project(value['id']) for value in project_data.values()]
    def process_projects(project):
        if project.name in bad_names.keys():
            project.name = bad_names[project.name]
        project.return_results(rest, all_commits)
    pool = ThreadPool(8)
    pool.map(process_projects, projects)
    pool.close()
    pool.join()

    print 'All data gathered.'
    print 'Number of commits: {}'.format(len(all_commits))
    fieldnames = get_fieldnames(
        'ods_gerrit.staging_gerrit_commits',
        settings.REDSHIFT_POSTGRES_INFO)
    s3_file = ('staging_gerrit_commits_{}.csv.gz'.format(
        date.today())
    )
    with gzip.open(s3_file, 'wb') as outf:
        writer = DictWriter(
            outf,
            fieldnames=fieldnames,
            extrasaction='ignore',
            delimiter='|'
        )
        cnt = 0
        pool = ThreadPool(8)
        try:
            pool.map(process_commits, all_commits)
        except AttributeError:
            pass
        pool.close()
        pool.join()
下面是我的
Commit
目标代码和
map
函数调用的函数:

class Commit(object):
    def __init__(self, rev_id, change_id, full_id):
        self.rev_id = rev_id
        self.change_id = change_id
        self.full_id = full_id

    def clean_data(self, _dict):
        for key, value in _dict.items():
            if isinstance(value, dict):
                self.clean_data(_dict[key])
            else:
                try:
                    _dict[key] = _dict[key].encode(
                        'utf_8',
                        'replace'
                    ).encode('string_escape').replace('|', '[pipe]')
                except AttributeError:
                    continue

    def get_data(self, ger_obj):
        print 'Getting data for a commit for {f_id}'.format(
            f_id=self.full_id
        )
        endpoint = (r'/changes/{c_id}/revisions/{r_id}/commit'.format(
            c_id=self.change_id,
            r_id=self.rev_id
        ))
        try:
            self.data = ger_obj.get(endpoint)
        except HTTPError:
            try:
                endpoint = (r'/changes/{f_id}/revisions/{r_id}/commit'.format(
                    f_id=self.full_id,
                    r_id=self.rev_id
                ))
                self.data = ger_obj.get(endpoint)
            except HTTPError:
                logging.warning('Neither endpoint returned data: {ep}'.format(
                    ep=endpoint
                ))
                raise HTTPError()
        except ReadTimeout:
            logging.warning('Read Timeout occurred for a commit.  Endpoint: '
                            '{ep/}'.format(ep=endpoint))
            return
        self.data['change_id'] = self.change_id
        self.data['proj_branch_id'] = self.full_id
        self.data['revision_id'] = self.rev_id
        self.data['commitid'] = self.data.get('commit')
        self.data['name'] = self.data.get('committer')['name']
        self.data['email'] = self.data.get('committer')['email']
        self.data['date'] = self.data.get('committer')['date']
        hash = md5()
        hash.update(json.dumps(self.data).encode('utf-8'))
        self.data['etl_checksum_md5'] = hash.hexdigest()
        self.data['etl_process_status'] = settings.ETL_PROCESS_STATUS
        self.data['etl_datetime_local'] = settings.ETL_DATETIME_LOCAL
        self.data['etl_pdi_version'] = settings.ETL_PDI_VERSION
        self.data['etl_pdi_build_version'] = settings.ETL_PDI_BUILD_VERSION
        self.data['etl_pdi_hostname'] = settings.ETL_PDI_HOSTNAME
        self.data['etl_pdi_ipaddress'] = settings.ETL_PDI_IPADDRESS
        self.clean_data(self.data)


    def write_data(self, writer):
        print 'Writing a commit for {f_id}'.format(f_id=self.full_id)
        writer.writerow(self.data)
def process_commits(commit):
    print 'On commit #{}'.format(cnt)
    unique_id = commit.change_id + commit.rev_id
    if not id_search(unique_ids, unique_id):
        try:
            commit.get_data(rest)
        except HTTPError:
            pass
        try:
            commit.write_data(writer=writer)
        except UnicodeEncodeError:
            logging.warning(
                '{data} caused a Unicode Encode Error.'.format(
                    data=commit.data
                ))
            pass
        global cnt
        cnt += 1
和控制器功能:

class Commit(object):
    def __init__(self, rev_id, change_id, full_id):
        self.rev_id = rev_id
        self.change_id = change_id
        self.full_id = full_id

    def clean_data(self, _dict):
        for key, value in _dict.items():
            if isinstance(value, dict):
                self.clean_data(_dict[key])
            else:
                try:
                    _dict[key] = _dict[key].encode(
                        'utf_8',
                        'replace'
                    ).encode('string_escape').replace('|', '[pipe]')
                except AttributeError:
                    continue

    def get_data(self, ger_obj):
        print 'Getting data for a commit for {f_id}'.format(
            f_id=self.full_id
        )
        endpoint = (r'/changes/{c_id}/revisions/{r_id}/commit'.format(
            c_id=self.change_id,
            r_id=self.rev_id
        ))
        try:
            self.data = ger_obj.get(endpoint)
        except HTTPError:
            try:
                endpoint = (r'/changes/{f_id}/revisions/{r_id}/commit'.format(
                    f_id=self.full_id,
                    r_id=self.rev_id
                ))
                self.data = ger_obj.get(endpoint)
            except HTTPError:
                logging.warning('Neither endpoint returned data: {ep}'.format(
                    ep=endpoint
                ))
                raise HTTPError()
        except ReadTimeout:
            logging.warning('Read Timeout occurred for a commit.  Endpoint: '
                            '{ep/}'.format(ep=endpoint))
            return
        self.data['change_id'] = self.change_id
        self.data['proj_branch_id'] = self.full_id
        self.data['revision_id'] = self.rev_id
        self.data['commitid'] = self.data.get('commit')
        self.data['name'] = self.data.get('committer')['name']
        self.data['email'] = self.data.get('committer')['email']
        self.data['date'] = self.data.get('committer')['date']
        hash = md5()
        hash.update(json.dumps(self.data).encode('utf-8'))
        self.data['etl_checksum_md5'] = hash.hexdigest()
        self.data['etl_process_status'] = settings.ETL_PROCESS_STATUS
        self.data['etl_datetime_local'] = settings.ETL_DATETIME_LOCAL
        self.data['etl_pdi_version'] = settings.ETL_PDI_VERSION
        self.data['etl_pdi_build_version'] = settings.ETL_PDI_BUILD_VERSION
        self.data['etl_pdi_hostname'] = settings.ETL_PDI_HOSTNAME
        self.data['etl_pdi_ipaddress'] = settings.ETL_PDI_IPADDRESS
        self.clean_data(self.data)


    def write_data(self, writer):
        print 'Writing a commit for {f_id}'.format(f_id=self.full_id)
        writer.writerow(self.data)
def process_commits(commit):
    print 'On commit #{}'.format(cnt)
    unique_id = commit.change_id + commit.rev_id
    if not id_search(unique_ids, unique_id):
        try:
            commit.get_data(rest)
        except HTTPError:
            pass
        try:
            commit.write_data(writer=writer)
        except UnicodeEncodeError:
            logging.warning(
                '{data} caused a Unicode Encode Error.'.format(
                    data=commit.data
                ))
            pass
        global cnt
        cnt += 1

如果您不确定AttributeError的确切触发位置,请尝试在try..except中包装整个
进程提交
,并在except部分中使用
回溯.print_exc()
。我猜它是在某个地方。它发生在commit
get_data
方法中。我试着处理这个问题,但还是遇到了异常。似乎它需要在线程级别,因为它是
pool.map\u asynch.get()
,如果我没记错的话。
multiprocessing.pool
捕获工作线程中的异常,并在主线程中重新释放它们。奇怪的是,
AttributeError
没有在worker中被捕获,它应该被捕获。请尝试在例外部分中记录.error(traceback.format_exc())。另外,您正在使用8个线程的
DictWriter
,我怀疑这是否安全。