Python Multiprocessing.dummy、Multiprocessing和map，如何执行错误处理？_Python_Error Handling_Python Multithreading_Python Multiprocessing

Python Multiprocessing.dummy、Multiprocessing和map，如何执行错误处理？

python error-handling

Python Multiprocessing.dummy、Multiprocessing和map，如何执行错误处理？,python,error-handling,python-multithreading,python-multiprocessing,Python,Error Handling,Python Multithreading,Python Multiprocessing,我正在使用multiprocessing.dummy模块进行一些并发处理。我正在发出HTTP请求，对象可能不会返回任何数据。在这种情况下，我需要捕获属性错误，然后继续我尝试在对象本身中捕获它，但仍然收到错误，唯一有效的方法是在pool.map调用自身时执行try/except。我想知道这是为什么，这是否是对多处理和映射函数进行错误处理的最佳方法以下是我的一些代码供参考： all_commits = [] projects = [Project(value['id']) for val

我正在使用

multiprocessing.dummy

模块进行一些并发处理。我正在发出

HTTP

请求，对象可能不会返回任何数据。在这种情况下，我需要捕获

属性错误

，然后继续

我尝试在对象本身中捕获它，但仍然收到错误，唯一有效的方法是在

pool.map

调用自身时执行

try/except

。我想知道这是为什么，这是否是对

多处理

和

映射

函数进行错误处理的最佳方法

以下是我的一些代码供参考：

all_commits = []
    projects = [Project(value['id']) for value in project_data.values()]
    def process_projects(project):
        if project.name in bad_names.keys():
            project.name = bad_names[project.name]
        project.return_results(rest, all_commits)
    pool = ThreadPool(8)
    pool.map(process_projects, projects)
    pool.close()
    pool.join()

    print 'All data gathered.'
    print 'Number of commits: {}'.format(len(all_commits))
    fieldnames = get_fieldnames(
        'ods_gerrit.staging_gerrit_commits',
        settings.REDSHIFT_POSTGRES_INFO)
    s3_file = ('staging_gerrit_commits_{}.csv.gz'.format(
        date.today())
    )
    with gzip.open(s3_file, 'wb') as outf:
        writer = DictWriter(
            outf,
            fieldnames=fieldnames,
            extrasaction='ignore',
            delimiter='|'
        )
        cnt = 0
        pool = ThreadPool(8)
        try:
            pool.map(process_commits, all_commits)
        except AttributeError:
            pass
        pool.close()
        pool.join()

下面是我的

Commit

目标代码和

map

函数调用的函数：

class Commit(object):
    def __init__(self, rev_id, change_id, full_id):
        self.rev_id = rev_id
        self.change_id = change_id
        self.full_id = full_id

    def clean_data(self, _dict):
        for key, value in _dict.items():
            if isinstance(value, dict):
                self.clean_data(_dict[key])
            else:
                try:
                    _dict[key] = _dict[key].encode(
                        'utf_8',
                        'replace'
                    ).encode('string_escape').replace('|', '[pipe]')
                except AttributeError:
                    continue

    def get_data(self, ger_obj):
        print 'Getting data for a commit for {f_id}'.format(
            f_id=self.full_id
        )
        endpoint = (r'/changes/{c_id}/revisions/{r_id}/commit'.format(
            c_id=self.change_id,
            r_id=self.rev_id
        ))
        try:
            self.data = ger_obj.get(endpoint)
        except HTTPError:
            try:
                endpoint = (r'/changes/{f_id}/revisions/{r_id}/commit'.format(
                    f_id=self.full_id,
                    r_id=self.rev_id
                ))
                self.data = ger_obj.get(endpoint)
            except HTTPError:
                logging.warning('Neither endpoint returned data: {ep}'.format(
                    ep=endpoint
                ))
                raise HTTPError()
        except ReadTimeout:
            logging.warning('Read Timeout occurred for a commit.  Endpoint: '
                            '{ep/}'.format(ep=endpoint))
            return
        self.data['change_id'] = self.change_id
        self.data['proj_branch_id'] = self.full_id
        self.data['revision_id'] = self.rev_id
        self.data['commitid'] = self.data.get('commit')
        self.data['name'] = self.data.get('committer')['name']
        self.data['email'] = self.data.get('committer')['email']
        self.data['date'] = self.data.get('committer')['date']
        hash = md5()
        hash.update(json.dumps(self.data).encode('utf-8'))
        self.data['etl_checksum_md5'] = hash.hexdigest()
        self.data['etl_process_status'] = settings.ETL_PROCESS_STATUS
        self.data['etl_datetime_local'] = settings.ETL_DATETIME_LOCAL
        self.data['etl_pdi_version'] = settings.ETL_PDI_VERSION
        self.data['etl_pdi_build_version'] = settings.ETL_PDI_BUILD_VERSION
        self.data['etl_pdi_hostname'] = settings.ETL_PDI_HOSTNAME
        self.data['etl_pdi_ipaddress'] = settings.ETL_PDI_IPADDRESS
        self.clean_data(self.data)


    def write_data(self, writer):
        print 'Writing a commit for {f_id}'.format(f_id=self.full_id)
        writer.writerow(self.data)

def process_commits(commit):
    print 'On commit #{}'.format(cnt)
    unique_id = commit.change_id + commit.rev_id
    if not id_search(unique_ids, unique_id):
        try:
            commit.get_data(rest)
        except HTTPError:
            pass
        try:
            commit.write_data(writer=writer)
        except UnicodeEncodeError:
            logging.warning(
                '{data} caused a Unicode Encode Error.'.format(
                    data=commit.data
                ))
            pass
        global cnt
        cnt += 1

和控制器功能：

class Commit(object):
    def __init__(self, rev_id, change_id, full_id):
        self.rev_id = rev_id
        self.change_id = change_id
        self.full_id = full_id

    def clean_data(self, _dict):
        for key, value in _dict.items():
            if isinstance(value, dict):
                self.clean_data(_dict[key])
            else:
                try:
                    _dict[key] = _dict[key].encode(
                        'utf_8',
                        'replace'
                    ).encode('string_escape').replace('|', '[pipe]')
                except AttributeError:
                    continue

    def get_data(self, ger_obj):
        print 'Getting data for a commit for {f_id}'.format(
            f_id=self.full_id
        )
        endpoint = (r'/changes/{c_id}/revisions/{r_id}/commit'.format(
            c_id=self.change_id,
            r_id=self.rev_id
        ))
        try:
            self.data = ger_obj.get(endpoint)
        except HTTPError:
            try:
                endpoint = (r'/changes/{f_id}/revisions/{r_id}/commit'.format(
                    f_id=self.full_id,
                    r_id=self.rev_id
                ))
                self.data = ger_obj.get(endpoint)
            except HTTPError:
                logging.warning('Neither endpoint returned data: {ep}'.format(
                    ep=endpoint
                ))
                raise HTTPError()
        except ReadTimeout:
            logging.warning('Read Timeout occurred for a commit.  Endpoint: '
                            '{ep/}'.format(ep=endpoint))
            return
        self.data['change_id'] = self.change_id
        self.data['proj_branch_id'] = self.full_id
        self.data['revision_id'] = self.rev_id
        self.data['commitid'] = self.data.get('commit')
        self.data['name'] = self.data.get('committer')['name']
        self.data['email'] = self.data.get('committer')['email']
        self.data['date'] = self.data.get('committer')['date']
        hash = md5()
        hash.update(json.dumps(self.data).encode('utf-8'))
        self.data['etl_checksum_md5'] = hash.hexdigest()
        self.data['etl_process_status'] = settings.ETL_PROCESS_STATUS
        self.data['etl_datetime_local'] = settings.ETL_DATETIME_LOCAL
        self.data['etl_pdi_version'] = settings.ETL_PDI_VERSION
        self.data['etl_pdi_build_version'] = settings.ETL_PDI_BUILD_VERSION
        self.data['etl_pdi_hostname'] = settings.ETL_PDI_HOSTNAME
        self.data['etl_pdi_ipaddress'] = settings.ETL_PDI_IPADDRESS
        self.clean_data(self.data)


    def write_data(self, writer):
        print 'Writing a commit for {f_id}'.format(f_id=self.full_id)
        writer.writerow(self.data)

def process_commits(commit):
    print 'On commit #{}'.format(cnt)
    unique_id = commit.change_id + commit.rev_id
    if not id_search(unique_ids, unique_id):
        try:
            commit.get_data(rest)
        except HTTPError:
            pass
        try:
            commit.write_data(writer=writer)
        except UnicodeEncodeError:
            logging.warning(
                '{data} caused a Unicode Encode Error.'.format(
                    data=commit.data
                ))
            pass
        global cnt
        cnt += 1

如果您不确定AttributeError的确切触发位置，请尝试在try..except中包装整个

进程提交

，并在except部分中使用

回溯.print_exc（）

。我猜它是在某个地方。它发生在commit

get_data

方法中。我试着处理这个问题，但还是遇到了异常。似乎它需要在线程级别，因为它是

pool.map\u asynch.get（）

，如果我没记错的话。

multiprocessing.pool

捕获工作线程中的异常，并在主线程中重新释放它们。奇怪的是，

AttributeError

没有在worker中被捕获，它应该被捕获。请尝试在例外部分中记录.error（traceback.format_exc（））。另外，您正在使用8个线程的

DictWriter

，我怀疑这是否安全。