Python:异步Cassandra插入

Python:异步Cassandra插入,python,cassandra,datastax,Python,Cassandra,Datastax,Cassandra Python驱动程序的问题是,返回的“未来”对象通过副作用添加回调。这意味着“未来”是不可组合的,就像Javascript或Scala中的未来是可组合的一样。我想知道是否有一种模式可以用来将不可组合的未来转化为可组合的未来(最好没有内存泄漏) 查看Datastax中的Cassandra Python驱动程序部分,我看到了一个示例,说明了他们如何创建一系列不断链接的插入查询。也就是这个模式的一个稍微复杂的版本: def insert_next(previous_result=s

Cassandra Python驱动程序的问题是,返回的“未来”对象通过副作用添加回调。这意味着“未来”是不可组合的,就像Javascript或Scala中的未来是可组合的一样。我想知道是否有一种模式可以用来将不可组合的未来转化为可组合的未来(最好没有内存泄漏)

查看Datastax中的Cassandra Python驱动程序部分,我看到了一个示例,说明了他们如何创建一系列不断链接的插入查询。也就是这个模式的一个稍微复杂的版本:

def insert_next(previous_result=sentinel):
    if previous_result is not sentinel:
        if isinstance(previous_result, BaseException):
            log.error("Error on insert: %r", previous_result)

    future = session.execute_async(query)
    # NOTE: this callback also handles errors
    future.add_callbacks(insert_next, insert_next)
作为一个玩具的例子,它非常有效。完成一个查询后,将再次执行另一个等效查询。此方案允许它们实现7k次写入/秒,而不尝试“链”回调的版本则限制了大约2k次写入/秒


我一直在努力想办法创造某种机制,让我重新抓住那个确切的机制,但没用。有人提出过类似的想法吗?

我花了一点时间思考如何以某种形式保护未来:

import logging
from Queue import Queue #queue in python 3
from threading import Event #hmm... this needed?


insert_logger = logging.getLogger('async_insert')
insert_logger.setLevel(logging.INFO)

def handle_err(err):
  insert_logger.warning('Failed to insert due to %s', err)


#Designed to work in a high write environment. Chained callbacks for best performance and fast fail/stop when error
#encountered. Next insert should re-up the writing. Potential loss of failed write. Some guarantee on order of write
#preservation.
class CappedQueueInserter(object):
  def __init__(self, session, max_count=0):
    self.__queue = Queue(max_count)
    self.__session = session
    self.__started = Event()

  @property
  def started(self):
    return self.__started.is_set()

  def insert(self, bound_statement):
    if not self.started:
      self._begin(bound_statement)
    else:
      self._enqueue(bound_statement)

  def _begin(self, bound_statement):
    def callback():
      try:
        bound = self.__queue.get(True) #block until an item is added to the queue
        future = self.__session.execute_async(bound)
        future.add_callbacks(callback, handle_err)
      except:
        self.__started.clear()

    self.__started.set()
    future = self.__session.execute_async(bound_statement)
    future.add_callbacks(callback, handle_err)

  def _enqueue(self, bound_statement):
    self.__queue.put(bound_statement, True)


#Separate insert statement binding from the insertion loop
class InsertEnqueue(object):
  def __init__(self, prepared_query, insert, consistency_level=None):
    self.__statement = prepared_query
    self.__level = consistency_level
    self.__sink = insert

  def insert(self, *args):
    bound = self.bind(*args)
    self.__sink.insert(bound)

  @property
  def consistency_level(self):
    return self.__level or self.__statement.consistency_level

  @consistency_level.setter
  def adjust_level(self, value):
    if value:
      self.__level = value

  def bind(self, *args):
    bound = self.__statement.bind(*args)
    bound.consistency_level = self.consistency_level

    return bound
队列
事件
组合起来触发事件。假设写操作“最终”会发生,这应该是可行的

import logging
from Queue import Queue #queue in python 3
from threading import Event #hmm... this needed?


insert_logger = logging.getLogger('async_insert')
insert_logger.setLevel(logging.INFO)

def handle_err(err):
  insert_logger.warning('Failed to insert due to %s', err)


#Designed to work in a high write environment. Chained callbacks for best performance and fast fail/stop when error
#encountered. Next insert should re-up the writing. Potential loss of failed write. Some guarantee on order of write
#preservation.
class CappedQueueInserter(object):
  def __init__(self, session, max_count=0):
    self.__queue = Queue(max_count)
    self.__session = session
    self.__started = Event()

  @property
  def started(self):
    return self.__started.is_set()

  def insert(self, bound_statement):
    if not self.started:
      self._begin(bound_statement)
    else:
      self._enqueue(bound_statement)

  def _begin(self, bound_statement):
    def callback():
      try:
        bound = self.__queue.get(True) #block until an item is added to the queue
        future = self.__session.execute_async(bound)
        future.add_callbacks(callback, handle_err)
      except:
        self.__started.clear()

    self.__started.set()
    future = self.__session.execute_async(bound_statement)
    future.add_callbacks(callback, handle_err)

  def _enqueue(self, bound_statement):
    self.__queue.put(bound_statement, True)


#Separate insert statement binding from the insertion loop
class InsertEnqueue(object):
  def __init__(self, prepared_query, insert, consistency_level=None):
    self.__statement = prepared_query
    self.__level = consistency_level
    self.__sink = insert

  def insert(self, *args):
    bound = self.bind(*args)
    self.__sink.insert(bound)

  @property
  def consistency_level(self):
    return self.__level or self.__statement.consistency_level

  @consistency_level.setter
  def adjust_level(self, value):
    if value:
      self.__level = value

  def bind(self, *args):
    bound = self.__statement.bind(*args)
    bound.consistency_level = self.consistency_level

    return bound