Python 并行和条件：非类型对象没有属性'__dict'；_Python_Python Multiprocessing

Python 并行和条件：非类型对象没有属性'__dict'；

python

Python 并行和条件：非类型对象没有属性'__dict'；,python,python-multiprocessing,Python,Python Multiprocessing,有关详细设置，请参阅。我想并行地创建许多类Toy的实例。然后我想将它们写入xml树 import itertools import pandas as pd import lxml.etree as et import numpy as np import sys import multiprocessing as mp def make_toys(df): l = [] for index, row in df.iterrows(): toys = [Toy

有关详细设置，请参阅。我想并行地创建许多类

Toy

的实例。然后我想将它们写入xml树

import itertools
import pandas as pd
import lxml.etree as et
import numpy as np
import sys
import multiprocessing as mp


def make_toys(df):
    l = []
    for index, row in df.iterrows():
        toys = [Toy(row) for _ in range(row['number'])]
        l += [x for x in toys if x is not None]
    return l


class Toy(object):
    def __new__(cls, *args, **kwargs):
        if np.random.uniform() <= 1:
            return super(Toy, cls).__new__(cls, *args, **kwargs)

    def __init__(self, row):
        self.id = None
        self.type = row['type']

    def set_id(self, x):
        self.id = x

    def write(self, tree):
        et.SubElement(tree, "toy", attrib={'id': str(self.id), 'type': self.type})


if __name__ == "__main__":
    table = pd.DataFrame({
        'type': ['a', 'b', 'c', 'd'],
        'number': [5, 4, 3, 10]})

    n_cores = 2
    split_df = np.array_split(table, n_cores)

    p = mp.Pool(n_cores)
    pool_results = p.map(make_toys, split_df)
    p.close()
    p.join()
    l = [a for L in pool_results for a in L]

    box = et.Element("box")
    box_file = et.ElementTree(box)

    for i, toy in itertools.izip(range(len(l)), l):
        Toy.set_id(toy, i)

    [Toy.write(x, box) for x in l]

    box_file.write(sys.stdout, pretty_print=True)

我甚至不知道这意味着什么，也不知道如何避免。如果我像在

l=make_toys（table）

中那样单独完成这个过程，它在任何随机情况下都运行良好

另一个解决方案顺便说一句，我知道这可以通过将

\uuuu new\uuuu

方法放在一边，而将

make\u toys（）

重写为

def make_toys(df):
    l = []
    for index, row in df.iterrows():
        prob = np.random.binomial(row['number'], 0.1)
        toys = [Toy(row) for _ in range(prob)]
        l += [x for x in toys if x is not None]
    return l

但是我正在努力了解这个错误。

我想你已经发现了一个由

Toy

实例在通过多处理池的结果

队列

multiprocessing.Pool

使用

Queue.Queue

s将子进程的结果传递回主进程

将对象放入队列时，对象将被pickle并显示背景线程稍后将酸洗数据刷新到底层管道

虽然实际的序列化在精神上可能有所不同对

Toy

实例的酸洗变成如下字节流：

In [30]: import pickle

In [31]: pickle.dumps(Toy(table.iloc[0]))
Out[31]: "ccopy_reg\n_reconstructor\np0\n(c__main__\nToy\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'type'\np6\nS'a'\np7\nsS'id'\np8\nNsb."

请注意，对象的模块和类在字节：

\uuuuu主\uuuuuu\nToy

类本身并没有被腌制。只有对类名称的引用

当字节流在管道的另一侧取消勾选时，Toy.\uuuuu new\uuuu
被调用以实例化

Toy

的新实例。新对象的

\uuuu dict\uuuu

然后使用字节流中未勾选的数据进行重构。当新对象为

None

时，它没有

\uuuu dict\uuuu

属性，因此引发AttributeError

因此，当

玩具

实例通过

队列

时，它可能在另一端变成

None

我相信这就是为什么使用

class Toy(object):
    def __new__(cls, *args, **kwargs):
        x = np.random.uniform() <= 0.5
        if x:
            return super(Toy, cls).__new__(cls, *args, **kwargs)
        logger.info('Returning None')

如果在脚本中添加日志记录

import itertools
import pandas as pd
import lxml.etree as et
import numpy as np
import sys
import multiprocessing as mp
import logging
logger = mp.log_to_stderr(logging.INFO)

def make_toys(df):
    result = []
    for index, row in df.iterrows():
        toys = [Toy(row) for _ in range(row['number'])]
        result += [x for x in toys if x is not None]
    return result


class Toy(object):
    def __new__(cls, *args, **kwargs):
        x = np.random.uniform() <= 0.97
        if x:
            return super(Toy, cls).__new__(cls, *args, **kwargs)
        logger.info('Returning None')

    def __init__(self, row):
        self.id = None
        self.type = row['type']

    def set_id(self, x):
        self.id = x

    def write(self, tree):
        et.SubElement(tree, "toy", attrib={'id': str(self.id), 'type': self.type})


if __name__ == "__main__":
    table = pd.DataFrame({
        'type': ['a', 'b', 'c', 'd'],
        'number': [5, 4, 3, 10]})

    n_cores = 2
    split_df = np.array_split(table, n_cores)

    p = mp.Pool(n_cores)
    pool_results = p.map(make_toys, split_df)
    p.close()
    p.join()
    l = [a for L in pool_results for a in L]

    box = et.Element("box")
    box_file = et.ElementTree(box)

    for i, toy in itertools.izip(range(len(l)), l):
        toy.set_id(i)

    for x in l:
        x.write(box)

    box_file.write(sys.stdout, pretty_print=True)

请注意，日志消息来自MainProcess，而不是池工作者进程。因为

返回None

消息来自

Toy.\uuuu new\uuuu

，这表明主进程调用了

Toy.\uuuuuuu new\uuu

。这证实了unpickling正在呼叫的说法

Toy.\uuuu new\uuuu

并将

Toy

的实例转换为

None

这个故事的寓意是，对于要通过多处理池队列传递的

Toy

实例，

Toy.\uuuuu new\uuuu

必须始终返回

玩具

。正如您所指出的，代码可以通过在

make_Toys

中仅实例化所需数量的玩具来修复：

def make_toys(df):
    result = []
    for index, row in df.iterrows():
        prob = np.random.binomial(row['number'], 0.1)
        result.extend([Toy(row) for _ in range(prob)])
    return result

顺便说一下，使用

Toy.write（x，box）

当

是

Toy

的一个实例时。首选的方法是使用

x.write(box)

同样，使用

toy.set_id（i）

而不是

toy.set_id（toy，i）

谢谢你的风格提示；我对Python很陌生。但是我认为行

[x for x in toys if x not None]

删除了那些非对象。而且，将机会移动到

for

循环将是次优的，特别是因为在我的实际代码中

\uuu init\uuuu

做了很多。如果我只是想把这个东西扔掉，我宁愿不经历这些。哦，天哪，我错过了。None肯定来自其他地方。我想你已经发现了一个令人惊讶的“gotcha”，这是由一些

Toy

实例在通过多处理池的结果队列时变为

None

引起的。我不知道前面的答案发生了什么，但是请注意，我正在从对象列表中删除

None

元素。

[INFO/MainProcess] Returning None

def make_toys(df):
    result = []
    for index, row in df.iterrows():
        prob = np.random.binomial(row['number'], 0.1)
        result.extend([Toy(row) for _ in range(prob)])
    return result

x.write(box)