C# 限制异步任务
我想运行一组异步任务,并限制在任何给定时间有多少任务可能等待完成 假设你有1000个URL,你一次只想打开50个请求;但一旦一个请求完成,您就可以打开到列表中下一个URL的连接。这样,在URL列表用尽之前,每次打开的连接总是正好有50个 如果可能的话,我还想利用给定数量的线程 我提出了一个扩展方法,C# 限制异步任务,c#,async-await,semaphore,throttling,tpl-dataflow,C#,Async Await,Semaphore,Throttling,Tpl Dataflow,我想运行一组异步任务,并限制在任何给定时间有多少任务可能等待完成 假设你有1000个URL,你一次只想打开50个请求;但一旦一个请求完成,您就可以打开到列表中下一个URL的连接。这样,在URL列表用尽之前,每次打开的连接总是正好有50个 如果可能的话,我还想利用给定数量的线程 我提出了一个扩展方法,ThrottleTasksAsync,它满足了我的需要。有没有更简单的解决方案?我认为这是一种常见的情况 用法: class Program { static void Main(string
ThrottleTasksAsync
,它满足了我的需要。有没有更简单的解决方案?我认为这是一种常见的情况
用法:
class Program
{
static void Main(string[] args)
{
Enumerable.Range(1, 10).ThrottleTasksAsync(5, 2, async i => { Console.WriteLine(i); return i; }).Wait();
Console.WriteLine("Press a key to exit...");
Console.ReadKey(true);
}
}
代码如下:
static class IEnumerableExtensions
{
public static async Task<Result_T[]> ThrottleTasksAsync<Enumerable_T, Result_T>(this IEnumerable<Enumerable_T> enumerable, int maxConcurrentTasks, int maxDegreeOfParallelism, Func<Enumerable_T, Task<Result_T>> taskToRun)
{
var blockingQueue = new BlockingCollection<Enumerable_T>(new ConcurrentBag<Enumerable_T>());
var semaphore = new SemaphoreSlim(maxConcurrentTasks);
// Run the throttler on a separate thread.
var t = Task.Run(() =>
{
foreach (var item in enumerable)
{
// Wait for the semaphore
semaphore.Wait();
blockingQueue.Add(item);
}
blockingQueue.CompleteAdding();
});
var taskList = new List<Task<Result_T>>();
Parallel.ForEach(IterateUntilTrue(() => blockingQueue.IsCompleted), new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
_ =>
{
Enumerable_T item;
if (blockingQueue.TryTake(out item, 100))
{
taskList.Add(
// Run the task
taskToRun(item)
.ContinueWith(tsk =>
{
// For effect
Thread.Sleep(2000);
// Release the semaphore
semaphore.Release();
return tsk.Result;
}
)
);
}
});
// Await all the tasks.
return await Task.WhenAll(taskList);
}
static IEnumerable<bool> IterateUntilTrue(Func<bool> condition)
{
while (!condition()) yield return true;
}
}
但是,线程池很快就会耗尽,您不能执行async
/wait
奖金:
为了解决调用CompleteAdding()
时在Take()
中引发异常的BlockingCollection
问题,我使用了带有超时的TryTake
重载。如果我没有在TryTake
中使用超时,它将无法达到使用BlockingCollection
的目的,因为TryTake
不会阻塞。有更好的办法吗?理想情况下,会有一个TakeAsync
方法。如建议,使用
A可能就是你要找的
您可以定义一个MaxDegreeOfParallelism
,以限制可以并行转换多少字符串(即可以下载多少URL)。然后将URL发布到块中,完成后,告诉块已完成添加项,并获取响应
var downloader = new TransformBlock<string, HttpResponse>(
url => Download(url),
new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 50 }
);
var buffer = new BufferBlock<HttpResponse>();
downloader.LinkTo(buffer);
foreach(var url in urls)
downloader.Post(url);
//or await downloader.SendAsync(url);
downloader.Complete();
await downloader.Completion;
IList<HttpResponse> responses;
if (buffer.TryReceiveAll(out responses))
{
//process responses
}
var downloader=new TransformBlock(
url=>下载(url),
新的ExecutionDataflowBlockOptions{MaxDegreeOfParallelism=50}
);
var buffer=new BufferBlock();
downloader.LinkTo(缓冲区);
foreach(url中的变量url)
downloader.Post(url);
//或者等待downloader.SendAsync(url);
downloader.Complete();
等待下载。完成;
IList反应;
if(buffer.TryReceiveAll(输出响应))
{
//过程响应
}
注意:
TransformBlock
缓冲其输入和输出。那么,为什么我们需要将它链接到缓冲块
因为TransformBlock
在所有项目(HttpResponse
)都被消耗之前不会完成,并且等待下载。完成将挂起。相反,我们让下载程序
将其所有输出转发到一个专用缓冲块,然后等待下载程序
完成,并检查缓冲块
假设您有1000个URL,并且您只希望在站点上打开50个请求
一段时间;但一旦一个请求完成,您就打开了一个连接
指向列表中的下一个URL。那样的话,总有50个
每次打开连接,直到URL列表用尽
下面这个简单的解决方案在这里多次出现。它不使用阻塞代码,也不显式创建线程,因此它的扩展性非常好:
const int MAX_DOWNLOADS = 50;
static async Task DownloadAsync(string[] urls)
{
using (var semaphore = new SemaphoreSlim(MAX_DOWNLOADS))
using (var httpClient = new HttpClient())
{
var tasks = urls.Select(async url =>
{
await semaphore.WaitAsync();
try
{
var data = await httpClient.GetStringAsync(url);
Console.WriteLine(data);
}
finally
{
semaphore.Release();
}
});
await Task.WhenAll(tasks);
}
}
问题是,下载数据的处理应该在不同的管道上完成,具有不同的并行级别,特别是当它是CPU限制的处理时
例如,您可能希望有4个线程同时执行数据处理(CPU内核的数量),以及多达50个挂起的更多数据请求(根本不使用线程)。AFAICT,这不是您的代码当前正在做的事情
这就是TPL Dataflow或Rx作为首选解决方案的用武之地。然而,通过简单的第三方物流来实现这样的东西当然是可能的。注意,这里唯一的阻塞代码是在任务中执行实际数据处理的代码。运行:
const int MAX_DOWNLOADS = 50;
const int MAX_PROCESSORS = 4;
// process data
class Processing
{
SemaphoreSlim _semaphore = new SemaphoreSlim(MAX_PROCESSORS);
HashSet<Task> _pending = new HashSet<Task>();
object _lock = new Object();
async Task ProcessAsync(string data)
{
await _semaphore.WaitAsync();
try
{
await Task.Run(() =>
{
// simuate work
Thread.Sleep(1000);
Console.WriteLine(data);
});
}
finally
{
_semaphore.Release();
}
}
public async void QueueItemAsync(string data)
{
var task = ProcessAsync(data);
lock (_lock)
_pending.Add(task);
try
{
await task;
}
catch
{
if (!task.IsCanceled && !task.IsFaulted)
throw; // not the task's exception, rethrow
// don't remove faulted/cancelled tasks from the list
return;
}
// remove successfully completed tasks from the list
lock (_lock)
_pending.Remove(task);
}
public async Task WaitForCompleteAsync()
{
Task[] tasks;
lock (_lock)
tasks = _pending.ToArray();
await Task.WhenAll(tasks);
}
}
// download data
static async Task DownloadAsync(string[] urls)
{
var processing = new Processing();
using (var semaphore = new SemaphoreSlim(MAX_DOWNLOADS))
using (var httpClient = new HttpClient())
{
var tasks = urls.Select(async (url) =>
{
await semaphore.WaitAsync();
try
{
var data = await httpClient.GetStringAsync(url);
// put the result on the processing pipeline
processing.QueueItemAsync(data);
}
finally
{
semaphore.Release();
}
});
await Task.WhenAll(tasks.ToArray());
await processing.WaitForCompleteAsync();
}
}
const int MAX_下载量=50;
const int MAX_处理器=4;
//过程数据
类处理
{
信号量limu信号量=新信号量lim(最大处理器);
HashSet_pending=新HashSet();
对象_lock=新对象();
异步任务进程异步(字符串数据)
{
wait_信号量。WaitAsync();
尝试
{
等待任务。运行(()=>
{
//模拟工作
睡眠(1000);
控制台写入线(数据);
});
}
最后
{
_semaphore.Release();
}
}
公共异步void QueueItemAsync(字符串数据)
{
var task=ProcessAsync(数据);
锁
_挂起。添加(任务);
尝试
{
等待任务;
}
抓住
{
如果(!task.iscancelled&&!task.IsFaulted)
throw;//不是任务的异常,请重试
//不要从列表中删除出现故障/取消的任务
返回;
}
//从列表中删除成功完成的任务
锁
_挂起。删除(任务);
}
公共异步任务WaitForCompleteTasync()
{
任务[]任务;
锁
任务=_pending.ToArray();
等待任务。何时(任务);
}
}
//下载数据
静态异步任务下载异步(字符串[]URL)
{
变量处理=新处理();
使用(var信号量=新信号量lim(MAX_下载))
使用(var httpClient=new httpClient())
{
var任务=url。选择(异步(url)=>
{
wait semaphore.WaitAsync();
尝试
{
var data=await-httpClient.GetStringAsync(url);
//将结果放在处理管道上
processing.QueueItemAsync(数据);
}
最后
{
semaphore.Release();
}
});
等待Task.WhenAll(tasks.ToArray());
等待处理。WaitForCompleteTasync();
}
}
根据要求,以下是我最终使用的代码
工作在主-详细配置中设置,每个主配置都是p
const int MAX_DOWNLOADS = 50;
const int MAX_PROCESSORS = 4;
// process data
class Processing
{
SemaphoreSlim _semaphore = new SemaphoreSlim(MAX_PROCESSORS);
HashSet<Task> _pending = new HashSet<Task>();
object _lock = new Object();
async Task ProcessAsync(string data)
{
await _semaphore.WaitAsync();
try
{
await Task.Run(() =>
{
// simuate work
Thread.Sleep(1000);
Console.WriteLine(data);
});
}
finally
{
_semaphore.Release();
}
}
public async void QueueItemAsync(string data)
{
var task = ProcessAsync(data);
lock (_lock)
_pending.Add(task);
try
{
await task;
}
catch
{
if (!task.IsCanceled && !task.IsFaulted)
throw; // not the task's exception, rethrow
// don't remove faulted/cancelled tasks from the list
return;
}
// remove successfully completed tasks from the list
lock (_lock)
_pending.Remove(task);
}
public async Task WaitForCompleteAsync()
{
Task[] tasks;
lock (_lock)
tasks = _pending.ToArray();
await Task.WhenAll(tasks);
}
}
// download data
static async Task DownloadAsync(string[] urls)
{
var processing = new Processing();
using (var semaphore = new SemaphoreSlim(MAX_DOWNLOADS))
using (var httpClient = new HttpClient())
{
var tasks = urls.Select(async (url) =>
{
await semaphore.WaitAsync();
try
{
var data = await httpClient.GetStringAsync(url);
// put the result on the processing pipeline
processing.QueueItemAsync(data);
}
finally
{
semaphore.Release();
}
});
await Task.WhenAll(tasks.ToArray());
await processing.WaitForCompleteAsync();
}
}
var success = true;
// Start processing all the master records.
Master master;
while (null != (master = await StoredProcedures.ClaimRecordsAsync(...)))
{
await masterBuffer.SendAsync(master);
}
// Finished sending master records
masterBuffer.Complete();
// Now, wait for all the batches to complete.
await batchAction.Completion;
return success;
// The dataflow network
BufferBlock<Master> masterBuffer = null;
TransformManyBlock<Master, Detail> masterTransform = null;
TransformBlock<Detail, object> detailTransform = null;
ActionBlock<Tuple<IList<object>, IList<object>>> batchAction = null;
// Buffer master records to enable efficient throttling.
masterBuffer = new BufferBlock<Master>(new DataflowBlockOptions { BoundedCapacity = 1 });
// Sequentially transform master records into a stream of detail records.
masterTransform = new TransformManyBlock<Master, Detail>(async masterRecord =>
{
var records = await StoredProcedures.GetObjectsAsync(masterRecord);
// Filter the master records based on some criteria here
var filteredRecords = records;
// Only propagate completion to the last batch
var propagateCompletion = masterBuffer.Completion.IsCompleted && masterTransform.InputCount == 0;
// Create a batch join block to encapsulate the results of the master record.
var batchjoinblock = new BatchedJoinBlock<object, object>(records.Count(), new GroupingDataflowBlockOptions { MaxNumberOfGroups = 1 });
// Add the batch block to the detail transform pipeline's link queue, and link the batch block to the the batch action block.
var detailLink1 = detailTransform.LinkTo(batchjoinblock.Target1, detailResult => detailResult is Detail);
var detailLink2 = detailTransform.LinkTo(batchjoinblock.Target2, detailResult => detailResult is Exception);
var batchLink = batchjoinblock.LinkTo(batchAction, new DataflowLinkOptions { PropagateCompletion = propagateCompletion });
// Unlink batchjoinblock upon completion.
// (the returned task does not need to be awaited, despite the warning.)
batchjoinblock.Completion.ContinueWith(task =>
{
detailLink1.Dispose();
detailLink2.Dispose();
batchLink.Dispose();
});
return filteredRecords;
}, new ExecutionDataflowBlockOptions { BoundedCapacity = 1 });
// Process each detail record asynchronously, 150 at a time.
detailTransform = new TransformBlock<Detail, object>(async detail => {
try
{
// Perform the action for each detail here asynchronously
await DoSomethingAsync();
return detail;
}
catch (Exception e)
{
success = false;
return e;
}
}, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 150, BoundedCapacity = 300 });
// Perform the proper action for each batch
batchAction = new ActionBlock<Tuple<IList<object>, IList<object>>>(async batch =>
{
var details = batch.Item1.Cast<Detail>();
var errors = batch.Item2.Cast<Exception>();
// Do something with the batch here
}, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4 });
masterBuffer.LinkTo(masterTransform, new DataflowLinkOptions { PropagateCompletion = true });
masterTransform.LinkTo(detailTransform, new DataflowLinkOptions { PropagateCompletion = true });