Python 如何将multiprocessing pool.map与多个参数一起使用？_Python_Multiprocessing

Python 如何将multiprocessing pool.map与多个参数一起使用？

python

Python 如何将multiprocessing pool.map与多个参数一起使用？,python,multiprocessing,Python,Multiprocessing,在Python多处理库中，是否有支持多个参数的pool.map变体 text = "test" def harvester(text, case): X = case[0] text+ str(X) if __name__ == '__main__': pool = multiprocessing.Pool(processes=6) case = RAW_DATASET pool.map(harvester(text,case),ca

在Python

多处理

库中，是否有支持多个参数的

pool.map

变体

text = "test"
def harvester(text, case):
    X = case[0]
    text+ str(X)

if __name__ == '__main__':
    pool = multiprocessing.Pool(processes=6)
    case = RAW_DATASET
    pool.map(harvester(text,case),case, 1)
    pool.close()
    pool.join()

答案取决于版本和情况。Python最新版本（自3.3以来）的最一般答案首先由下面的描述。1它使用接受参数元组序列的方法。然后，它会自动解压缩每个元组中的参数，并将其传递给给定函数：

import multiprocessing
from itertools import product

def merge_names(a, b):
    return '{} & {}'.format(a, b)

if __name__ == '__main__':
    names = ['Brown', 'Wilson', 'Bartlett', 'Rivera', 'Molloy', 'Opie']
    with multiprocessing.Pool(processes=3) as pool:
        results = pool.starmap(merge_names, product(names, repeat=2))
    print(results)

# Output: ['Brown & Brown', 'Brown & Wilson', 'Brown & Bartlett', ...

import itertools
from multiprocessing import Pool

def universal_worker(input_pair):
    function, args = input_pair
    return function(*args)

def pool_args(function, *args):
    return zip(itertools.repeat(function), zip(*args))

对于Python的早期版本，需要编写一个helper函数来显式解压参数。如果要将

与

一起使用，还需要编写一个包装器，将

池

转换为上下文管理器。（感谢您指出这一点。）

在更简单的情况下，使用固定的第二个参数，也可以使用

partial

，但只能在Python 2.7+中使用

import multiprocessing
from functools import partial
from contextlib import contextmanager

@contextmanager
def poolcontext(*args, **kwargs):
    pool = multiprocessing.Pool(*args, **kwargs)
    yield pool
    pool.terminate()

def merge_names(a, b):
    return '{} & {}'.format(a, b)

if __name__ == '__main__':
    names = ['Brown', 'Wilson', 'Bartlett', 'Rivera', 'Molloy', 'Opie']
    with poolcontext(processes=3) as pool:
        results = pool.map(partial(merge_names, b='Sons'), names)
    print(results)

# Output: ['Brown & Sons', 'Wilson & Sons', 'Bartlett & Sons', ...

一,。这在很大程度上是受到他的回答的启发，而他的回答本应该被接受。但由于这本书仍停留在顶部，因此似乎最好对其进行改进，以供未来读者阅读

pool.map是否有支持多个参数的变体

text = "test"
def harvester(text, case):
    X = case[0]
    text+ str(X)

if __name__ == '__main__':
    pool = multiprocessing.Pool(processes=6)
    case = RAW_DATASET
    pool.map(harvester(text,case),case, 1)
    pool.close()
    pool.join()

Python 3.3包括：

对于旧版本：

#!/usr/bin/env python2
import itertools
from multiprocessing import Pool, freeze_support

def func(a, b):
    print a, b

def func_star(a_b):
    """Convert `f([1,2])` to `f(1,2)` call."""
    return func(*a_b)

def main():
    pool = Pool()
    a_args = [1,2,3]
    second_arg = 1
    pool.map(func_star, itertools.izip(a_args, itertools.repeat(second_arg)))

if __name__=="__main__":
    freeze_support()
    main()

输出注意这里是如何使用和的

由于不能在Python 2.6上使用或使用类似的功能，因此应明确定义简单包装函数

func_star（）

。另请参见。

我认为下面的内容会更好

def multi_run_wrapper(args):
   return add(*args)
def add(x,y):
    return x+y
if __name__ == "__main__":
    from multiprocessing import Pool
    pool = Pool(4)
    results = pool.map(multi_run_wrapper,[(1,2),(2,3),(3,4)])
    print results

输出

[3, 5, 7]

有一种称为（注意：使用github上的版本）的

多处理分支，它不需要starmap
——map函数镜像python map的API，因此map可以接受多个参数。使用pathos
，您通常也可以在解释器中执行多处理，而不是被困在\uuuuu main\uuuu
块中。Pathos将在经过一些温和的更新后发布，主要是转换为Python3.x
  Python 2.7.5 (default, Sep 30 2013, 20:15:49) 
  [GCC 4.2.1 (Apple Inc. build 5566)] on darwin
  Type "help", "copyright", "credits" or "license" for more information.
  >>> def func(a,b):
  ...     print a,b
  ...
  >>>
  >>> from pathos.multiprocessing import ProcessingPool    
  >>> pool = ProcessingPool(nodes=4)
  >>> pool.map(func, [1,2,3], [1,1,1])
  1 1
  2 1
  3 1
  [None, None, None]
  >>>
  >>> # also can pickle stuff like lambdas 
  >>> result = pool.map(lambda x: x**2, range(10))
  >>> result
  [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
  >>>
  >>> # also does asynchronous map
  >>> result = pool.amap(pow, [1,2,3], [4,5,6])
  >>> result.get()
  [1, 32, 729]
  >>>
  >>> # or can return a map iterator
  >>> result = pool.imap(pow, [1,2,3], [4,5,6])
  >>> result
  <processing.pool.IMapIterator object at 0x110c2ffd0>
  >>> list(result)
  [1, 32, 729]

在回答中了解了itertools之后，我决定更进一步，编写一个负责并行化的parmap
包，在python-2.7和python-3.2（以及更高版本）上提供map
和starmap
函数，这些函数可以接受任意数量的位置参数
装置
pip install parmap

如何并行化：
import parmap
# If you want to do:
y = [myfunction(x, argument1, argument2) for x in mylist]
# In parallel:
y = parmap.map(myfunction, mylist, argument1, argument2)

# If you want to do:
z = [myfunction(x, y, argument1, argument2) for (x,y) in mylist]
# In parallel:
z = parmap.starmap(myfunction, mylist, argument1, argument2)

# If you want to do:
listx = [1, 2, 3, 4, 5, 6]
listy = [2, 3, 4, 5, 6, 7]
param = 3.14
param2 = 42
listz = []
for (x, y) in zip(listx, listy):
        listz.append(myfunction(x, y, param1, param2))
# In parallel:
listz = parmap.starmap(myfunction, zip(listx, listy), param1, param2)

我已将parmap上载到PyPI和a
例如，该问题可回答如下：
import parmap

def harvester(case, text):
    X = case[0]
    text+ str(X)

if __name__ == "__main__":
    case = RAW_DATASET  # assuming this is an iterable
    parmap.map(harvester, case, "test", chunksize=1)

pool = Pool(n_core)
list_model = pool.map(universal_worker, pool_args(function, arg_0, arg_1, arg_2)
pool.close()
pool.join()

另一种方法是将列表列表传递给单参数例程：
import os
from multiprocessing import Pool

def task(args):
    print "PID =", os.getpid(), ", arg1 =", args[0], ", arg2 =", args[1]

pool = Pool()

pool.map(task, [
        [1,2],
        [3,4],
        [5,6],
        [7,8]
    ])

您可以使用自己喜欢的方法构建参数列表。
您可以使用以下两个函数，以避免为每个新函数编写包装：
import multiprocessing
from itertools import product

def merge_names(a, b):
    return '{} & {}'.format(a, b)

if __name__ == '__main__':
    names = ['Brown', 'Wilson', 'Bartlett', 'Rivera', 'Molloy', 'Opie']
    with multiprocessing.Pool(processes=3) as pool:
        results = pool.starmap(merge_names, product(names, repeat=2))
    print(results)

# Output: ['Brown & Brown', 'Brown & Wilson', 'Brown & Bartlett', ...

import itertools
from multiprocessing import Pool

def universal_worker(input_pair):
    function, args = input_pair
    return function(*args)

def pool_args(function, *args):
    return zip(itertools.repeat(function), zip(*args))

将函数function
与参数列表arg_0
、arg_1
和arg_2
一起使用，如下所示：
import parmap

def harvester(case, text):
    X = case[0]
    text+ str(X)

if __name__ == "__main__":
    case = RAW_DATASET  # assuming this is an iterable
    parmap.map(harvester, case, "test", chunksize=1)

pool = Pool(n_core)
list_model = pool.map(universal_worker, pool_args(function, arg_0, arg_1, arg_2)
pool.close()
pool.join()

使用Python3.3+和pool.starmap（）：

结果:
1 --- 4
2 --- 5
3 --- 6

如果愿意，还可以压缩（）更多参数：zip（a、b、c、d、e）

如果要将常量值作为参数传递，请执行以下操作：
import itertools

zip(itertools.repeat(constant), a)

如果你的函数应该返回一些东西：
results = pool.starmap(write, zip(a,b))

这将提供一个包含返回值的列表。
在python 3.4.4中，您可以使用多处理。get_context（）获取上下文对象以使用多个启动方法：
import multiprocessing as mp

def foo(q, h, w):
    q.put(h + ' ' + w)
    print(h + ' ' + w)

if __name__ == '__main__':
    ctx = mp.get_context('spawn')
    q = ctx.Queue()
    p = ctx.Process(target=foo, args=(q,'hello', 'world'))
    p.start()
    print(q.get())
    p.join()

或者你只是简单地替换
pool.map(harvester(text,case),case, 1)

作者：
一个更好的方法是使用decorator而不是手工编写包装函数。特别是当您有很多函数需要映射时，decorator可以避免为每个函数编写包装器，从而节省您的时间。通常，修饰函数是不可拾取的，但是我们可以使用functools
绕过它。可以找到更多的分歧
这里是一个例子
def unpack_args(func):
    from functools import wraps
    @wraps(func)
    def wrapper(args):
        if isinstance(args, dict):
            return func(**args)
        else:
            return func(*args)
    return wrapper

@unpack_args
def func(x, y):
    return x + y

然后您可以使用压缩参数映射它
np, xlist, ylist = 2, range(10), range(10)
pool = Pool(np)
res = pool.map(func, zip(xlist, ylist))
pool.close()
pool.join()

def mainImage(package_iter) -> vec3: 
    fragCoord=package_iter[0]  
    iResolution=package_iter[1]
    iTime=package_iter[2]

当然，正如其他答案中提到的，您可能总是在Python 3中使用（>=3.3）。
另一个简单的替代方法是将函数参数包装在元组中，然后将应该在元组中传递的参数包装起来。在处理大型数据块时，这可能并不理想。我相信它会为每个元组制作副本
from multiprocessing import Pool

def f((a,b,c,d)):
    print a,b,c,d
    return a + b + c +d

if __name__ == '__main__':
    p = Pool(10)
    data = [(i+0,i+1,i+2,i+3) for i in xrange(10)]
    print(p.map(f, data))
    p.close()
    p.join()

以某种随机顺序给出输出：
0 1 2 3
1 2 3 4
2 3 4 5
3 4 5 6
4 5 6 7
5 6 7 8
7 8 9 10
6 7 8 9
8 9 10 11
9 10 11 12
[6, 10, 14, 18, 22, 26, 30, 34, 38, 42]

在官方文件中指出，它只支持一个iterable论点。我喜欢在这种情况下使用apply\u async。就你而言，我会：
from multiprocessing import Process, Pool, Manager

text = "test"
def harvester(text, case, q = None):
 X = case[0]
 res = text+ str(X)
 if q:
  q.put(res)
 return res


def block_until(q, results_queue, until_counter=0):
 i = 0
 while i < until_counter:
  results_queue.put(q.get())
  i+=1

if __name__ == '__main__':
 pool = multiprocessing.Pool(processes=6)
 case = RAW_DATASET
 m = Manager()
 q = m.Queue()
 results_queue = m.Queue() # when it completes results will reside in this queue
 blocking_process = Process(block_until, (q, results_queue, len(case)))
 blocking_process.start()
 for c in case:
  try:
   res = pool.apply_async(harvester, (text, case, q = None))
   res.get(timeout=0.1)
  except:
   pass
 blocking_process.join()

<代码>来自多处理导入流程、池、管理器
text=“测试”
def收割机（文本、案例、q=无）：
X=案例[0]
res=文本+str（X）
如果q：
q、 put（res）
返回res
def阻塞直到（q，结果队列，直到计数器=0）：
i=0
而i

python2的更好解决方案：

from multiprocessing import Pool def func((i, (a, b))): print i, a, b return a + b pool = Pool(3) pool.map(func, [(0,(1,2)), (1,(2,3)), (2,(3, 4))])
2 3 4
1 2 3
0112
出[]：

[3,5,7]
如何接受多个参数：

def f1(args): a, b, c = args[0] , args[1] , args[2] return a+b+c if __name__ == "__main__": import multiprocessing pool = multiprocessing.Pool(4) result1 = pool.map(f1, [ [1,2,3] ]) print(result1)

对于python2，您可以使用以下技巧

def fun(a,b): return a+b pool = multiprocessing.Pool(processes=6) b=233 pool.map(lambda x:fun(x,b),range(1000))

这是我用来将多个参数传递给fork中使用的单参数函数的例程的一个示例：

这里有很多答案，但似乎没有一个能提供适用于任何版本的Python 2/3兼容代码。如果您希望代码正常工作，这将适用于以下任一Python版本：

#为了与python 2/3兼容，请定义池上下文管理器 #在Python 2中支持“with”语句如果系统版本信息[0]==2：从contextlib导入contextmanager @上下文管理器 def多处理上下文（*args，**kwargs）：池=多处理。池（*args，**kwargs）屈服点 text = "test" def unpack(args): return args[0](*args[1:]) def harvester(text, case): X = case[0] text+ str(X) if __name__ == '__main__': pool = multiprocessing.Pool(processes=6) case = RAW_DATASET # args is a list of tuples # with the function to execute as the first item in each tuple args = [(harvester, text, c) for c in case] # doing it this way, we can pass any function # and we don't need to define a wrapper for each different function # if we need to use more than one pool.map(unpack, args) pool.close() pool.join() from multiprocessing import Pool # Wrapper of the function to map: class makefun: def __init__(self, var2): self.var2 = var2 def fun(self, i): var2 = self.var2 return var1[i] + var2 # Couple of variables for the example: var1 = [1, 2, 3, 5, 6, 7, 8] var2 = [9, 10, 11, 12] # Open the pool: pool = Pool(processes=2) # Wrapper loop for j in range(len(var2)): # Obtain the function to map pool_fun = makefun(var2[j]).fun # Fork loop for i, value in enumerate(pool.imap(pool_fun, range(len(var1))), 0): print(var1[i], '+' ,var2[j], '=', value) # Close the pool pool.close() def _function_to_run_for_each(x): return x.lower() with multiprocessing_context(processes=3) as pool: results = pool.map(_function_to_run_for_each, ['Bob', 'Sue', 'Tim']) print(results) import multiprocessing def main(): with multiprocessing.Pool(10) as pool: params = [ (2, 2), (3, 3), (4, 4) ] pool.starmap(printSum, params) # end with # end function def printSum(num1, num2): mySum = num1 + num2 print('num1 = ' + str(num1) + ', num2 = ' + str(num2) + ', sum = ' + str(mySum)) # end function if __name__ == '__main__': main() num1 = 2, num2 = 2, sum = 4 num1 = 3, num2 = 3, sum = 6 num1 = 4, num2 = 4, sum = 8 np.eye(3) = [ [1. 0. 0.] [0. 1. 0.] [0. 0. 1.]] import numpy as np from multiprocessing.dummy import Pool as ThreadPool from multiprocessing import cpu_count def extract_counts(label_array): labels = np.unique(label_array) out = extract_counts_helper([label_array], labels) return out def extract_counts_helper(args, labels): n = max(1, cpu_count() - 1) pool = ThreadPool(n) results = {} pool.map(wrapper(args, results), labels) pool.close() pool.join() return results def wrapper(argsin, results): def inner_fun(label): label_array = argsin[0] counts = get_label_counts(label_array, label) results[label] = counts return inner_fun def get_label_counts(label_array, label): return sum(label_array.flatten() == label) if __name__ == "__main__": img = np.ones([2,2]) out = extract_counts(img) print('input array: \n', img) print('label counts: ', out) print("========") img = np.eye(3) out = extract_counts(img) print('input array: \n', img) print('label counts: ', out) print("========") img = np.random.randint(5, size=(3, 3)) out = extract_counts(img) print('input array: \n', img) print('label counts: ', out) print("========") input array: [[1. 1.] [1. 1.]] label counts: {1.0: 4} ======== input array: [[1. 0. 0.] [0. 1. 0.] [0. 0. 1.]] label counts: {0.0: 6, 1.0: 3} ======== input array: [[4 4 0] [2 4 3] [2 3 1]] label counts: {0: 1, 1: 1, 2: 2, 3: 2, 4: 3} ======== def mainImage(fragCoord : vec2, iResolution : vec3, iTime : float) -> vec3: def mainImage(package_iter) -> vec3: fragCoord=package_iter[0] iResolution=package_iter[1] iTime=package_iter[2] package_iter = [] iResolution = vec3(nx,ny,1) for j in range( (ny-1), -1, -1): for i in range( 0, nx, 1): fragCoord : vec2 = vec2(i,j) time_elapsed_seconds = 10 package_iter.append( (fragCoord, iResolution, time_elapsed_seconds) ) array_rgb_values = [] with concurrent.futures.ProcessPoolExecutor() as executor: for val in executor.map(mainImage, package_iter): fragColor=val ir = clip( int(255* fragColor.r), 0, 255) ig = clip(int(255* fragColor.g), 0, 255) ib= clip(int(255* fragColor.b), 0, 255) array_rgb_values.append( (ir,ig,ib) )