C# 线程安全的数据缓冲区,用于批量插入受控大小的数据
我有一个模拟,生成的数据必须保存到数据库C# 线程安全的数据缓冲区,用于批量插入受控大小的数据,c#,.net,parallel-processing,task-parallel-library,parallel-extensions,C#,.net,Parallel Processing,Task Parallel Library,Parallel Extensions,我有一个模拟,生成的数据必须保存到数据库 ParallelLoopResult res = Parallel.For(0, 1000000, options, (r, state) => { ComplexDataSet cds = GenerateData(r); SaveDataToDatabase(cds); }); 模拟会生成大量数据,因此先生成数据,然后将其保存到数据库(高达1GB的数据)是不现实的,而将其逐个保存到数据库(太小的事务量不现实)也是没有意义的
ParallelLoopResult res = Parallel.For(0, 1000000, options, (r, state) =>
{
ComplexDataSet cds = GenerateData(r);
SaveDataToDatabase(cds);
});
模拟会生成大量数据,因此先生成数据,然后将其保存到数据库(高达1GB的数据)是不现实的,而将其逐个保存到数据库(太小的事务量不现实)也是没有意义的。我想将它们作为一个可控大小的批插入(比如一次提交100个)插入到数据库中
然而,我认为我对并行计算的了解还没有那么理论化。我提出了这个(正如你所看到的,它有很大的缺陷):
DataBuffer buffer=newdatabuffer(…);
ParallelLoopResult res=并行。对于(0,10000000,选项,(r,状态)=>
{
ComplexDataSet cds=生成的数据(r);
SaveDataToBuffer(cds,i==r-1);
});
公共类数据缓冲
{
整数计数=0;
整数极限=100
对象_locker=新对象();
ConcurrentQueue ComplexDataBagQueue{get;set;}
public void SaveDataToBuffer(ComplexDataSet数据,bool isfinalcycle)
{
锁(储物柜)
{
如果(计数>=限制)
{
ConcurrentBag-dequeueRef;
if(ComplexDataBagQueue.TryDequeue(out dequeueRef))
{
提交(dequeueRef);
}
_lastItemRef=新的ConcurrentBag{data};
ComplexDataSetsQueue.Enqueue(_lastItemRef);
计数=1;
}
其他的
{
//第一次
如果(_lastItemRef==null)
{
_lastItemRef=新的ConcurrentBag{data};
ComplexDataSetsQueue.Enqueue(_lastItemRef);
计数=1;
}
//如果缓冲区未满
其他的
{
_lastItemRef.Add(数据);
计数++;
}
}
如果(isfinalcycle)
{
//将所有尚未提交的内容提交
ConcurrentBag-dequeueRef;
while(ComplexDataSetsQueue.TryDequeue(out dequeueRef))
{
提交(dequeueRef);
}
}
}
}
公共无效提交(ConcurrentBag数据)
{
//将数据提交到数据库..这应该在另一个线程或其他什么中吗?
}
}
如您所见,我使用队列创建缓冲区,然后手动决定何时提交。然而,我有一种强烈的感觉,这不是很好的解决我的问题的办法。首先,我不确定我是否做得对。其次,我不确定这是否是完全线程安全的(或者根本不安全)
请您看一下,并评论一下我应该采取哪些不同的做法?或者是否有更好的方法(使用某种生产者-消费者技术或其他方法)
谢谢并致以最良好的祝愿,
D. < P>而不是增加软件的复杂性,而是考虑简化。您可以将代码重构为三个部分:
/[1]-类负责生成复杂的数据集和
//将它们添加到处理队列
类排队工人
{
//生成数据并添加到队列
内部无效队列查询(ConcurrentQueue resultQueue)
{
对于(1,10000,(i)=>
{
ComplexDataSet cds=生成的数据(i);
结果队列(cds);
});
}
//生成数据
ComplexDataSet GenerateData(int i)
{
返回新的ComplexDataSet();
}
}
[3]
/[3]这家伙从处理队列中获取集合,并在
//已生成N个项目
类出列工人
{
//保存已处理的出列数据的缓冲区
专用静态并行包缓冲器;
//锁定以偶尔刷新数据库中的数据
私有静态对象syncRoot=新对象();
//从处理队列中获取项目并将其添加到内部缓冲存储器中
//一旦缓冲区满了,就将其刷新到数据库中
内部无效ParralledQueue(ConcurrentQueue resultQueue)
{
缓冲区=新的ConcurrentBag();
int N=100;
对于(1,10000,(i)=>
{
//尝试退出队列
ComplexDataSet cds=null;
var spinWait=新的spinWait();
while(cds==null)
{
结果queue.TryDequeue(输出CD);
spinWait.SpinOnce();
}
//添加到缓冲区
缓冲区。添加(CD);
//如果需要,刷新到数据库
如果(buffer.Count==N)
{
锁定(同步根)
{
IEnumerable data=buffer.ToArray();
//将数据刷新到数据库
缓冲区=新的ConcurrentBag();
}
DataBuffer buffer = new DataBuffer(...);
ParallelLoopResult res = Parallel.For(0, 10000000, options, (r, state) =>
{
ComplexDataSet cds = GenerateData(r);
buffer.SaveDataToBuffer(cds, i == r - 1);
});
public class DataBuffer
{
int count = 0;
int limit = 100
object _locker = new object();
ConcurrentQueue<ConcurrentBag<ComplexDataSet>> ComplexDataBagQueue{ get; set; }
public void SaveDataToBuffer(ComplexDataSet data, bool isfinalcycle)
{
lock (_locker)
{
if(count >= limit)
{
ConcurrentBag<ComplexDataSet> dequeueRef;
if(ComplexDataBagQueue.TryDequeue(out dequeueRef))
{
Commit(dequeueRef);
}
_lastItemRef = new ConcurrentBag<ComplexDataSet>{data};
ComplexDataSetsQueue.Enqueue(_lastItemRef);
count = 1;
}
else
{
// First time
if(_lastItemRef == null)
{
_lastItemRef = new ConcurrentBag<ComplexDataSet>{data};
ComplexDataSetsQueue.Enqueue(_lastItemRef);
count = 1;
}
// If buffer isn't full
else
{
_lastItemRef.Add(data);
count++;
}
}
if(isfinalcycle)
{
// Commit everything that hasn't been committed yet
ConcurrentBag<ComplexDataSet> dequeueRef;
while (ComplexDataSetsQueue.TryDequeue(out dequeueRef))
{
Commit(dequeueRef);
}
}
}
}
public void Commit(ConcurrentBag<ComplexDataSet> data)
{
// Commit data to database..should this be somehow in another thread or something ?
}
}
// [1] - Class is responsible for generating complex data sets and
// adding them to processing queue
class EnqueueWorker
{
//generate data and add to queue
internal void ParrallelEnqueue(ConcurrentQueue<ComplexDataSet> resultQueue)
{
Parallel.For(1, 10000, (i) =>
{
ComplexDataSet cds = GenerateData(i);
resultQueue.Enqueue(cds);
});
}
//generate data
ComplexDataSet GenerateData(int i)
{
return new ComplexDataSet();
}
}
//[3] This guy takes sets from the processing queue and flush results when
// N items have been generated
class DequeueWorker
{
//buffer that holds processed dequeued data
private static ConcurrentBag<ComplexDataSet> buffer;
//lock to flush the data to the db once in a while
private static object syncRoot = new object();
//take item from processing queue and add it to internal buffer storage
//once buffer is full - flush it to the database
internal void ParrallelDequeue(ConcurrentQueue<ComplexDataSet> resultQueue)
{
buffer = new ConcurrentBag<ComplexDataSet>();
int N = 100;
Parallel.For(1, 10000, (i) =>
{
//try dequeue
ComplexDataSet cds = null;
var spinWait = new SpinWait();
while (cds == null)
{
resultQueue.TryDequeue(out cds);
spinWait.SpinOnce();
}
//add to buffer
buffer.Add(cds);
//flush to database if needed
if (buffer.Count == N)
{
lock (syncRoot)
{
IEnumerable<ComplexDataSet> data = buffer.ToArray();
// flush data to database
buffer = new ConcurrentBag<ComplexDataSet>();
}
}
});
}
}
class ComplexDataSet { }
class Program
{
//processing queueu - [2]
private static ConcurrentQueue<ComplexDataSet> processingQueue;
static void Main(string[] args)
{
// create new processing queue - single instance for whole app
processingQueue = new ConcurrentQueue<ComplexDataSet>();
//enqueue worker
Task enqueueTask = Task.Factory.StartNew(() =>
{
EnqueueWorker enqueueWorker = new EnqueueWorker();
enqueueWorker.ParrallelEnqueue(processingQueue);
});
//dequeue worker
Task dequeueTask = Task.Factory.StartNew(() =>
{
DequeueWorker dequeueWorker = new DequeueWorker();
dequeueWorker.ParrallelDequeue(processingQueue);
});
}
}
int total = 10000000;
int step = 1000;
Parallel.For(0, total / step, (r, state) =>
{
int start = r * start;
int end = start + step;
ComplexDataSet[] result = new ComplexDataSet[step];
for (int i = start; i < end; i++)
{
result[i - start] = GenerateData(i);
}
Commit(result);
});
private void Commit(ComplexDataSet[] data)
{
using (var connection = new SqlConnection("connection string..."))
{
connection.Open();
// insert your data here...
}
}
Parallel.For(0, 10000000, () => new ThreadState(),
(i, loopstate, threadstate) =>
{
ComplexDataSet data = GenerateData(i);
threadstate.Add(data);
return threadstate;
}, threadstate => threadstate.Dispose());
sealed class ThreadState : IDisposable
{
readonly IDisposable db;
readonly Queue<ComplexDataSet> queue = new Queue<ComplexDataSet>();
public ThreadState()
{
// initialize db with a private MongoDb connection.
}
public void Add(ComplexDataSet cds)
{
queue.Enqueue(cds);
if(queue.Count == 100)
{
Commit();
}
}
void Commit()
{
db.Write(queue);
queue.Clear();
}
public void Dispose()
{
try
{
if(queue.Count > 0)
{
Commit();
}
}
finally
{
db.Dispose();
}
}
}
// Specify a maximum of 1000 items in the collection so that we don't
// run out of memory if we get data faster than we can commit it.
// Add() will wait if it is full.
BlockingCollection<ComplexDataSet> commits =
new BlockingCollection<ComplexDataSet>(1000);
Task consumer = Task.Factory.StartNew(() =>
{
// This is the consumer. It processes the
// "commits" queue until it signals completion.
while(!commits.IsCompleted)
{
ComplexDataSet cds;
// Timeout of -1 will wait for an item or IsCompleted == true.
if(commits.TryTake(out cds, -1))
{
// Got at least one item, write it.
db.Write(cds);
// Continue dequeuing until the queue is empty, where it will
// timeout instantly and return false, or until we've dequeued
// 100 items.
for(int i = 1; i < 100 && commits.TryTake(out cds, 0); ++i)
{
db.Write(cds);
}
// Now that we're waiting for more items or have dequeued 100
// of them, commit. More can be continue to be added to the
// queue by other threads while this commit is processing.
db.Commit();
}
}
}, TaskCreationOptions.LongRunning);
try
{
// This is the producer.
Parallel.For(0, 1000000, i =>
{
ComplexDataSet data = GenerateData(i);
commits.Add(data);
});
}
finally // put in a finally to ensure the task closes down.
{
commits.CompleteAdding(); // set commits.IsFinished = true.
consumer.Wait(); // wait for task to finish committing all the items.
}