Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/349.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/redis/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python多线程管理器.list(),如何正确访问数据_Python_Multithreading - Fatal编程技术网

Python多线程管理器.list(),如何正确访问数据

Python多线程管理器.list(),如何正确访问数据,python,multithreading,Python,Multithreading,我正在构建一个类,该类在自己的进程中启动,并以批量大小将数据推送到数据库。此类使用Manager.list获取数据。我认为这是一种常见的模式,数据库访问在一个单独的过程中进行,但我找不到合适的库,所以我想我可以自己使用 我在内部使用threading.Timer来唤醒我的数据库工作者并检查共享队列。但是,当它醒来时,队列中没有任何内容,尽管内容放在那里。我是否错误地使用了Manager.list 源代码: import random from threading import Timer imp

我正在构建一个类,该类在自己的进程中启动,并以批量大小将数据推送到数据库。此类使用Manager.list获取数据。我认为这是一种常见的模式,数据库访问在一个单独的过程中进行,但我找不到合适的库,所以我想我可以自己使用

我在内部使用threading.Timer来唤醒我的数据库工作者并检查共享队列。但是,当它醒来时,队列中没有任何内容,尽管内容放在那里。我是否错误地使用了Manager.list

源代码:

import random
from threading import Timer
import threading

from sqlalchemy import *
from multiprocessing import Process, Manager
from util.config import get_connection


def __convert_to_key(connection, table):
    return "{}.{}".format(connection.name, table.name)


class ConnectionWorker():
    __batch_size = 1000
    __batch_insert_queue = None
    __manager = Manager()
    __wait_interval = 5.0
    __finish = False
    __connection = None
    __table = None
    __timer = None
    finished = False

    def __init__(self, connection, table):
        self.__lock = threading.RLock()
        self.__connection = connection
        self.__table = table
        p = Process(target=self.__insert_data)
        p.start()

    def get_batch_insert_queue(self):
        self.__lock.acquire()
        try:
            if self.__batch_insert_queue is None:
                self.__batch_insert_queue = self.__manager.list()
            return self.__batch_insert_queue
        finally:
            self.__lock.release()

    def __insert_data(self):
        print("__insert_data, the queue is {}".format(len(self.get_batch_insert_queue())))
        q = self.get_batch_insert_queue()

        #push everything now if we have been told to finish
        if self.__finish:
            print("__finish flag has been set")
            self.__connection.execute(self.__table.insert().values(q))
            self.finished = True
            return

        #if there is nothing to do then just sleep
        if len(q) == 0:
            print("The queue is empty, sleeping")
            self.__timer = Timer(self.__wait_interval, self.__insert_data)
            self.__timer.start()
            self.__timer.join()

        values_to_insert = []
        while len(q) > 0 and len(values_to_insert) < self.__batch_size:
            values_to_insert.append(q.pop)
        print("Inserting {} values".format(len(values_to_insert)))
        self.__connection.execute(self.__table.insert().values(values_to_insert))

        #don't sleep if the queue has more work to do
        if len(q) >= self.__batch_size:
            print("Not sleeping, there is more work to be done, {} items".format(len(q)))
            self.__insert_data()
        else:
            print("Sleeping")
            self.__timer = Timer(self.__wait_interval, self.__insert_data).start()
            self.__timer.start()
            self.__timer.join()

    def finish(self):
        print("Setting finish to true")
        self.__finish = True

#test data
if __name__ == "__main__":
    #create the db and get metadata
    conn = get_connection()
    query = "DROP TABLE IF EXISTS tmp_test"
    try:
        conn.execute(query)
    except:
        pass
    query = """CREATE TABLE tmp_test (
    value bigint DEFAULT NULL
    ) ENGINE=InnoDB;"""
    conn.execute(query)

    metadata = MetaData()
    metadata.reflect(bind=conn)
    tbl = metadata.tables["tmp_test"]

    c = ConnectionWorker(conn, tbl)
    q = c.get_batch_insert_queue()
    for item in random.sample(xrange(1, 1000000000), 100000):
        q.append(item)
    print("The queue is {}".format(len(q)))
    print("The batch queue is {}".format(len(c.get_batch_insert_queue())))
    import time
    time.sleep(10)
    c.finish()

    while not c.finished:
        time.sleep(1)
第一个队列为空有助于对象初始化,但下两个队列中似乎应该包含项。我也不清楚为什么当finish对象设置为True时,worker会让它通过self。我认为应该打印finish check。finish标志已经设置

欢迎评论以及指向默认情况下可能处理所有这些问题的库的指针。

数据永远不会在进程间隐式共享。两个后果:

主程序中创建的Manager.list与工作进程中创建的Manager.list无关;以及

主程序中的self.\u finish属性与辅助进程中的self.\u finish属性无关

你真的应该后退,尝试更简单的代码,直到这些东西对你来说更有意义。做这件事的通常方法是这样的,我已经扔掉了所有的类和方法的积垢,所以可以看到这里什么是重要的。请注意,不需要额外的线程或睡眠等:


c=ConnectionWorkerconn,tbl是否启动了一个新进程,q=c.get\u batch\u insert\u队列是否为列表获取了一个代理以供主运行线程使用?无意冒犯,但您的代码太复杂了,我不想花时间来详细地解开它。相反,我为您提供了更简单的代码,这些代码实际上是有效的;-你不需要接受它。如果你想自己解开它,太好了!但是,请接受我的建议,将您的代码缩减到最基本的部分。这里发生了很多可怕的事情,比如在主程序中编译类定义时产生的副作用是创建一个管理器,而您的代码甚至不会在Windows上运行。
__insert_data, the queue is 0
The queue is empty, sleeping
The queue is 100000
The batch queue is 100000
__insert_data, the queue is 0
The queue is empty, sleeping
__insert_data, the queue is 0
The queue is empty, sleeping
Setting finish to true
__insert_data, the queue is 0
The queue is empty, sleeping
# shared data must be passed, even mp data structures
def worker(q):
    values_to_insert = []
    while True:
        item = q.get() # no need to sleep - blocks until data is ready
        if item is None:
            break
        values_to_insert.append(item)
        if len(values_to_insert) >= 39: # whatever - your `__batch_size`
            print "processing", values_to_insert
            values_to_insert = []
    # deal with any leftovers
    if values_to_insert:
        print "processing", values_to_insert

if __name__ == "__main__":
    import multiprocessing as mp
    import random

    q = mp.Queue(100)  # bounded queue
    proc = mp.Process(target=worker, args=(q,))
    proc.start()
    for item in random.sample(xrange(1, 1000000000), 100000):
        # will block if q has more than 100 items; blocking
        # waits for worker to catch up
        q.put(item)
    q.put(None)  # tell worker we're done
    proc.join()