Python Multiprocessing.dummy、Multiprocessing和map,如何执行错误处理?
我正在使用Python Multiprocessing.dummy、Multiprocessing和map,如何执行错误处理?,python,error-handling,python-multithreading,python-multiprocessing,Python,Error Handling,Python Multithreading,Python Multiprocessing,我正在使用multiprocessing.dummy模块进行一些并发处理。我正在发出HTTP请求,对象可能不会返回任何数据。在这种情况下,我需要捕获属性错误,然后继续 我尝试在对象本身中捕获它,但仍然收到错误,唯一有效的方法是在pool.map调用自身时执行try/except。我想知道这是为什么,这是否是对多处理和映射函数进行错误处理的最佳方法 以下是我的一些代码供参考: all_commits = [] projects = [Project(value['id']) for val
multiprocessing.dummy
模块进行一些并发处理。我正在发出HTTP
请求,对象可能不会返回任何数据。在这种情况下,我需要捕获属性错误
,然后继续
我尝试在对象本身中捕获它,但仍然收到错误,唯一有效的方法是在pool.map
调用自身时执行try/except
。我想知道这是为什么,这是否是对多处理
和映射
函数进行错误处理的最佳方法
以下是我的一些代码供参考:
all_commits = []
projects = [Project(value['id']) for value in project_data.values()]
def process_projects(project):
if project.name in bad_names.keys():
project.name = bad_names[project.name]
project.return_results(rest, all_commits)
pool = ThreadPool(8)
pool.map(process_projects, projects)
pool.close()
pool.join()
print 'All data gathered.'
print 'Number of commits: {}'.format(len(all_commits))
fieldnames = get_fieldnames(
'ods_gerrit.staging_gerrit_commits',
settings.REDSHIFT_POSTGRES_INFO)
s3_file = ('staging_gerrit_commits_{}.csv.gz'.format(
date.today())
)
with gzip.open(s3_file, 'wb') as outf:
writer = DictWriter(
outf,
fieldnames=fieldnames,
extrasaction='ignore',
delimiter='|'
)
cnt = 0
pool = ThreadPool(8)
try:
pool.map(process_commits, all_commits)
except AttributeError:
pass
pool.close()
pool.join()
下面是我的Commit
目标代码和map
函数调用的函数:
class Commit(object):
def __init__(self, rev_id, change_id, full_id):
self.rev_id = rev_id
self.change_id = change_id
self.full_id = full_id
def clean_data(self, _dict):
for key, value in _dict.items():
if isinstance(value, dict):
self.clean_data(_dict[key])
else:
try:
_dict[key] = _dict[key].encode(
'utf_8',
'replace'
).encode('string_escape').replace('|', '[pipe]')
except AttributeError:
continue
def get_data(self, ger_obj):
print 'Getting data for a commit for {f_id}'.format(
f_id=self.full_id
)
endpoint = (r'/changes/{c_id}/revisions/{r_id}/commit'.format(
c_id=self.change_id,
r_id=self.rev_id
))
try:
self.data = ger_obj.get(endpoint)
except HTTPError:
try:
endpoint = (r'/changes/{f_id}/revisions/{r_id}/commit'.format(
f_id=self.full_id,
r_id=self.rev_id
))
self.data = ger_obj.get(endpoint)
except HTTPError:
logging.warning('Neither endpoint returned data: {ep}'.format(
ep=endpoint
))
raise HTTPError()
except ReadTimeout:
logging.warning('Read Timeout occurred for a commit. Endpoint: '
'{ep/}'.format(ep=endpoint))
return
self.data['change_id'] = self.change_id
self.data['proj_branch_id'] = self.full_id
self.data['revision_id'] = self.rev_id
self.data['commitid'] = self.data.get('commit')
self.data['name'] = self.data.get('committer')['name']
self.data['email'] = self.data.get('committer')['email']
self.data['date'] = self.data.get('committer')['date']
hash = md5()
hash.update(json.dumps(self.data).encode('utf-8'))
self.data['etl_checksum_md5'] = hash.hexdigest()
self.data['etl_process_status'] = settings.ETL_PROCESS_STATUS
self.data['etl_datetime_local'] = settings.ETL_DATETIME_LOCAL
self.data['etl_pdi_version'] = settings.ETL_PDI_VERSION
self.data['etl_pdi_build_version'] = settings.ETL_PDI_BUILD_VERSION
self.data['etl_pdi_hostname'] = settings.ETL_PDI_HOSTNAME
self.data['etl_pdi_ipaddress'] = settings.ETL_PDI_IPADDRESS
self.clean_data(self.data)
def write_data(self, writer):
print 'Writing a commit for {f_id}'.format(f_id=self.full_id)
writer.writerow(self.data)
def process_commits(commit):
print 'On commit #{}'.format(cnt)
unique_id = commit.change_id + commit.rev_id
if not id_search(unique_ids, unique_id):
try:
commit.get_data(rest)
except HTTPError:
pass
try:
commit.write_data(writer=writer)
except UnicodeEncodeError:
logging.warning(
'{data} caused a Unicode Encode Error.'.format(
data=commit.data
))
pass
global cnt
cnt += 1
和控制器功能:
class Commit(object):
def __init__(self, rev_id, change_id, full_id):
self.rev_id = rev_id
self.change_id = change_id
self.full_id = full_id
def clean_data(self, _dict):
for key, value in _dict.items():
if isinstance(value, dict):
self.clean_data(_dict[key])
else:
try:
_dict[key] = _dict[key].encode(
'utf_8',
'replace'
).encode('string_escape').replace('|', '[pipe]')
except AttributeError:
continue
def get_data(self, ger_obj):
print 'Getting data for a commit for {f_id}'.format(
f_id=self.full_id
)
endpoint = (r'/changes/{c_id}/revisions/{r_id}/commit'.format(
c_id=self.change_id,
r_id=self.rev_id
))
try:
self.data = ger_obj.get(endpoint)
except HTTPError:
try:
endpoint = (r'/changes/{f_id}/revisions/{r_id}/commit'.format(
f_id=self.full_id,
r_id=self.rev_id
))
self.data = ger_obj.get(endpoint)
except HTTPError:
logging.warning('Neither endpoint returned data: {ep}'.format(
ep=endpoint
))
raise HTTPError()
except ReadTimeout:
logging.warning('Read Timeout occurred for a commit. Endpoint: '
'{ep/}'.format(ep=endpoint))
return
self.data['change_id'] = self.change_id
self.data['proj_branch_id'] = self.full_id
self.data['revision_id'] = self.rev_id
self.data['commitid'] = self.data.get('commit')
self.data['name'] = self.data.get('committer')['name']
self.data['email'] = self.data.get('committer')['email']
self.data['date'] = self.data.get('committer')['date']
hash = md5()
hash.update(json.dumps(self.data).encode('utf-8'))
self.data['etl_checksum_md5'] = hash.hexdigest()
self.data['etl_process_status'] = settings.ETL_PROCESS_STATUS
self.data['etl_datetime_local'] = settings.ETL_DATETIME_LOCAL
self.data['etl_pdi_version'] = settings.ETL_PDI_VERSION
self.data['etl_pdi_build_version'] = settings.ETL_PDI_BUILD_VERSION
self.data['etl_pdi_hostname'] = settings.ETL_PDI_HOSTNAME
self.data['etl_pdi_ipaddress'] = settings.ETL_PDI_IPADDRESS
self.clean_data(self.data)
def write_data(self, writer):
print 'Writing a commit for {f_id}'.format(f_id=self.full_id)
writer.writerow(self.data)
def process_commits(commit):
print 'On commit #{}'.format(cnt)
unique_id = commit.change_id + commit.rev_id
if not id_search(unique_ids, unique_id):
try:
commit.get_data(rest)
except HTTPError:
pass
try:
commit.write_data(writer=writer)
except UnicodeEncodeError:
logging.warning(
'{data} caused a Unicode Encode Error.'.format(
data=commit.data
))
pass
global cnt
cnt += 1
如果您不确定AttributeError的确切触发位置,请尝试在try..except中包装整个
进程提交
,并在except部分中使用回溯.print_exc()
。我猜它是在某个地方。它发生在commitget_data
方法中。我试着处理这个问题,但还是遇到了异常。似乎它需要在线程级别,因为它是pool.map\u asynch.get()
,如果我没记错的话。multiprocessing.pool
捕获工作线程中的异常,并在主线程中重新释放它们。奇怪的是,AttributeError
没有在worker中被捕获,它应该被捕获。请尝试在例外部分中记录.error(traceback.format_exc())。另外,您正在使用8个线程的DictWriter
,我怀疑这是否安全。