C# 线程安全的数据缓冲区,用于批量插入受控大小的数据

C# 线程安全的数据缓冲区,用于批量插入受控大小的数据,c#,.net,parallel-processing,task-parallel-library,parallel-extensions,C#,.net,Parallel Processing,Task Parallel Library,Parallel Extensions,我有一个模拟,生成的数据必须保存到数据库 ParallelLoopResult res = Parallel.For(0, 1000000, options, (r, state) => { ComplexDataSet cds = GenerateData(r); SaveDataToDatabase(cds); }); 模拟会生成大量数据,因此先生成数据,然后将其保存到数据库(高达1GB的数据)是不现实的,而将其逐个保存到数据库(太小的事务量不现实)也是没有意义的

我有一个模拟,生成的数据必须保存到数据库

ParallelLoopResult res = Parallel.For(0, 1000000, options, (r, state) =>
{
    ComplexDataSet cds = GenerateData(r);

    SaveDataToDatabase(cds);

});
模拟会生成大量数据,因此先生成数据,然后将其保存到数据库(高达1GB的数据)是不现实的,而将其逐个保存到数据库(太小的事务量不现实)也是没有意义的。我想将它们作为一个可控大小的批插入(比如一次提交100个)插入到数据库中

然而,我认为我对并行计算的了解还没有那么理论化。我提出了这个(正如你所看到的,它有很大的缺陷):

DataBuffer buffer=newdatabuffer(…);
ParallelLoopResult res=并行。对于(0,10000000,选项,(r,状态)=>
{
ComplexDataSet cds=生成的数据(r);
SaveDataToBuffer(cds,i==r-1);
});
公共类数据缓冲
{
整数计数=0;
整数极限=100
对象_locker=新对象();
ConcurrentQueue ComplexDataBagQueue{get;set;}
public void SaveDataToBuffer(ComplexDataSet数据,bool isfinalcycle)
{
锁(储物柜)
{
如果(计数>=限制)
{
ConcurrentBag-dequeueRef;
if(ComplexDataBagQueue.TryDequeue(out dequeueRef))
{
提交(dequeueRef);
}
_lastItemRef=新的ConcurrentBag{data};
ComplexDataSetsQueue.Enqueue(_lastItemRef);
计数=1;
}
其他的
{
//第一次
如果(_lastItemRef==null)
{
_lastItemRef=新的ConcurrentBag{data};
ComplexDataSetsQueue.Enqueue(_lastItemRef);
计数=1;
}
//如果缓冲区未满
其他的
{
_lastItemRef.Add(数据);
计数++;
}
}
如果(isfinalcycle)
{
//将所有尚未提交的内容提交
ConcurrentBag-dequeueRef;
while(ComplexDataSetsQueue.TryDequeue(out dequeueRef))
{
提交(dequeueRef);
}
}
}
}
公共无效提交(ConcurrentBag数据)
{
//将数据提交到数据库..这应该在另一个线程或其他什么中吗?
}
}
如您所见,我使用队列创建缓冲区,然后手动决定何时提交。然而,我有一种强烈的感觉,这不是很好的解决我的问题的办法。首先,我不确定我是否做得对。其次,我不确定这是否是完全线程安全的(或者根本不安全)

请您看一下,并评论一下我应该采取哪些不同的做法?或者是否有更好的方法(使用某种生产者-消费者技术或其他方法)

谢谢并致以最良好的祝愿,
D.

< P>而不是增加软件的复杂性,而是考虑简化。您可以将代码重构为三个部分:

  • 排队的工人

    这是并行并发生成的数据。因为它会进行一些繁重的计算并生成复杂的数据集

  • 实际队列

    一个并发队列,用于存储[1]中的结果—如此多的复杂数据集。在这里,我假设ComplexDataSet的一个实例实际上并不真正消耗资源,而且相当轻。只要队列是并发的,它就支持并行的“插入”和“删除”

  • 下队的工人

    从处理队列[2]中获取ComplexDataSet的一个实例并将其放入并发包(或其他存储)中的代码。一旦包中有N个您阻止的项目,停止排队,将包中的内容刷新到数据库中并清除它。最后,取消阻止并恢复排队

  • 下面是一些元代码(它仍然可以编译,但需要改进)

    [1]

    /[1]-类负责生成复杂的数据集和
    //将它们添加到处理队列
    类排队工人
    {
    //生成数据并添加到队列
    内部无效队列查询(ConcurrentQueue resultQueue)
    {
    对于(1,10000,(i)=>
    {
    ComplexDataSet cds=生成的数据(i);
    结果队列(cds);
    });
    }
    //生成数据
    ComplexDataSet GenerateData(int i)
    {
    返回新的ComplexDataSet();
    }
    }
    
    [3]

    /[3]这家伙从处理队列中获取集合,并在
    //已生成N个项目
    类出列工人
    {
    //保存已处理的出列数据的缓冲区
    专用静态并行包缓冲器;
    //锁定以偶尔刷新数据库中的数据
    私有静态对象syncRoot=新对象();
    //从处理队列中获取项目并将其添加到内部缓冲存储器中
    //一旦缓冲区满了,就将其刷新到数据库中
    内部无效ParralledQueue(ConcurrentQueue resultQueue)
    {
    缓冲区=新的ConcurrentBag();
    int N=100;
    对于(1,10000,(i)=>
    {
    //尝试退出队列
    ComplexDataSet cds=null;
    var spinWait=新的spinWait();
    while(cds==null)
    {
    结果queue.TryDequeue(输出CD);
    spinWait.SpinOnce();
    }
    //添加到缓冲区
    缓冲区。添加(CD);
    //如果需要,刷新到数据库
    如果(buffer.Count==N)
    {
    锁定(同步根)
    {
    IEnumerable data=buffer.ToArray();
    //将数据刷新到数据库
    缓冲区=新的ConcurrentBag();
    }
    
    DataBuffer buffer = new DataBuffer(...);
    
    ParallelLoopResult res = Parallel.For(0, 10000000, options, (r, state) =>
    {
        ComplexDataSet cds = GenerateData(r);
    
        buffer.SaveDataToBuffer(cds, i == r - 1);
    
    });
    
    public class DataBuffer
    {
        int count = 0;
        int limit = 100
    
        object _locker = new object();
    
        ConcurrentQueue<ConcurrentBag<ComplexDataSet>> ComplexDataBagQueue{ get; set; }
    
        public void SaveDataToBuffer(ComplexDataSet data, bool isfinalcycle)
        {
                lock (_locker)
                {
                    if(count >= limit)
                    {
                        ConcurrentBag<ComplexDataSet> dequeueRef;
                        if(ComplexDataBagQueue.TryDequeue(out dequeueRef))
                        {
                            Commit(dequeueRef);
                        }
    
                        _lastItemRef = new ConcurrentBag<ComplexDataSet>{data};
                        ComplexDataSetsQueue.Enqueue(_lastItemRef);
                        count = 1;
                    }
                    else
                    {
                        // First time
                        if(_lastItemRef == null)
                        {
                            _lastItemRef = new ConcurrentBag<ComplexDataSet>{data};
                            ComplexDataSetsQueue.Enqueue(_lastItemRef);
                            count = 1;
                        }
                        // If buffer isn't full
                        else
                        {
                            _lastItemRef.Add(data);
                            count++;
                        }
                    }
    
                    if(isfinalcycle)
                    {
                            // Commit everything that hasn't been committed yet
                            ConcurrentBag<ComplexDataSet> dequeueRef;    
                        while (ComplexDataSetsQueue.TryDequeue(out dequeueRef))
                        {
                            Commit(dequeueRef);
                        }
                    }
                }
        }
    
        public void Commit(ConcurrentBag<ComplexDataSet> data)
        {
            // Commit data to database..should this be somehow in another thread or something ?
        }
    }
    
    // [1] - Class is responsible for generating complex data sets and 
    // adding them to processing queue
    class EnqueueWorker
    {
        //generate data and add to queue
        internal void ParrallelEnqueue(ConcurrentQueue<ComplexDataSet> resultQueue)
        {
            Parallel.For(1, 10000, (i) =>
            {
                ComplexDataSet cds = GenerateData(i);
                resultQueue.Enqueue(cds);
    
            });
        }
    
        //generate data
        ComplexDataSet GenerateData(int i)
        {
            return new ComplexDataSet();
        }
    }
    
    //[3] This guy takes sets from the processing queue and flush results when 
    // N items have been generated
    class DequeueWorker
    {
        //buffer that holds processed dequeued data
        private static ConcurrentBag<ComplexDataSet> buffer;
    
        //lock to flush the data to the db once in a while
        private static object syncRoot = new object();
    
        //take item from processing queue and add it to internal buffer storage
        //once buffer is full - flush it to the database
        internal void ParrallelDequeue(ConcurrentQueue<ComplexDataSet> resultQueue)
        {
            buffer = new ConcurrentBag<ComplexDataSet>();
            int N = 100;
    
            Parallel.For(1, 10000, (i) =>
            {
                //try dequeue
                ComplexDataSet cds = null;
    
                var spinWait = new SpinWait();
    
                while (cds == null)
                {
                    resultQueue.TryDequeue(out cds);
                    spinWait.SpinOnce();
                }
    
                //add to buffer
                buffer.Add(cds);
    
                //flush to database if needed
                if (buffer.Count == N)
                {
                    lock (syncRoot)
                    {
                        IEnumerable<ComplexDataSet> data = buffer.ToArray();
    
                        // flush data to database
    
                        buffer = new ConcurrentBag<ComplexDataSet>();
                    }
                }
    
            });
        }        
    }
    
    class ComplexDataSet { }
    
    class Program
    {
        //processing queueu - [2]
        private static ConcurrentQueue<ComplexDataSet> processingQueue;
    
        static void Main(string[] args)
        {
            // create new processing queue - single instance for whole app
            processingQueue = new ConcurrentQueue<ComplexDataSet>();
    
            //enqueue worker
            Task enqueueTask = Task.Factory.StartNew(() =>
                {
                    EnqueueWorker enqueueWorker = new EnqueueWorker();
                    enqueueWorker.ParrallelEnqueue(processingQueue);
                });
    
            //dequeue worker
            Task dequeueTask = Task.Factory.StartNew(() =>
            {
                DequeueWorker dequeueWorker = new DequeueWorker();
                dequeueWorker.ParrallelDequeue(processingQueue);
            });            
        }
    }
    
    int total = 10000000;
    int step = 1000;
    
    Parallel.For(0, total / step, (r, state) =>
    {
        int start = r * start;
        int end = start + step;
    
        ComplexDataSet[] result = new ComplexDataSet[step];
    
        for (int i = start; i < end; i++)
        {
            result[i - start] = GenerateData(i);
        }
    
        Commit(result);
    });
    
    private void Commit(ComplexDataSet[] data)
    {
        using (var connection = new SqlConnection("connection string..."))
        {
            connection.Open();
    
            // insert your data here...
        }
    }
    
    Parallel.For(0, 10000000, () => new ThreadState(),
        (i, loopstate, threadstate) =>
    {
        ComplexDataSet data = GenerateData(i);
    
        threadstate.Add(data);
    
        return threadstate;
    }, threadstate => threadstate.Dispose());
    
    sealed class ThreadState : IDisposable
    {
        readonly IDisposable db;
        readonly Queue<ComplexDataSet> queue = new Queue<ComplexDataSet>();
    
        public ThreadState()
        {
            // initialize db with a private MongoDb connection.
        }
    
        public void Add(ComplexDataSet cds)
        {
            queue.Enqueue(cds);
    
            if(queue.Count == 100)
            {
                Commit();
            }
        }
    
        void Commit()
        {
            db.Write(queue);
            queue.Clear();
        }
    
        public void Dispose()
        {
            try
            {
                if(queue.Count > 0)
                {
                    Commit();
                }
            }
            finally
            {
                db.Dispose();
            }
        }
    }
    
    // Specify a maximum of 1000 items in the collection so that we don't
    // run out of memory if we get data faster than we can commit it.
    // Add() will wait if it is full.
    
    BlockingCollection<ComplexDataSet> commits =
        new BlockingCollection<ComplexDataSet>(1000);
    
    Task consumer = Task.Factory.StartNew(() =>
        {
            // This is the consumer.  It processes the
            // "commits" queue until it signals completion.
    
            while(!commits.IsCompleted)
            {
                ComplexDataSet cds;
    
                // Timeout of -1 will wait for an item or IsCompleted == true.
    
                if(commits.TryTake(out cds, -1))
                {
                    // Got at least one item, write it.
                    db.Write(cds);
    
                    // Continue dequeuing until the queue is empty, where it will
                    // timeout instantly and return false, or until we've dequeued
                    // 100 items.
    
                    for(int i = 1; i < 100 && commits.TryTake(out cds, 0); ++i)
                    {
                        db.Write(cds);
                    }
    
                    // Now that we're waiting for more items or have dequeued 100
                    // of them, commit.  More can be continue to be added to the
                    // queue by other threads while this commit is processing.
    
                    db.Commit();
                }
            }
        }, TaskCreationOptions.LongRunning);
    
    try
    {
        // This is the producer.
    
        Parallel.For(0, 1000000, i =>
            {
                ComplexDataSet data = GenerateData(i);
                commits.Add(data);
            });
    }
    finally // put in a finally to ensure the task closes down.
    {
        commits.CompleteAdding(); // set commits.IsFinished = true.
        consumer.Wait(); // wait for task to finish committing all the items.
    }