C# 异步/等待和多处理
我有一个txt文件,有50000个唯一的域名,一开始我只想打开每个网站。我正在使用async HttpClient并尝试3种不同的方法来分割任务: 1.C# 异步/等待和多处理,c#,performance,asynchronous,dotnet-httpclient,C#,Performance,Asynchronous,Dotnet Httpclient,我有一个txt文件,有50000个唯一的域名,一开始我只想打开每个网站。我正在使用async HttpClient并尝试3种不同的方法来分割任务: 1. IEnumerable lines=File.ReadLines(“File.txt”); 尝试 { 数据集所有数据; var downloadData=新TransformBlock( 异步行=> { HttpClientHandler HttpClientHandler=新的HttpClientHandler(); HttpClient=新
IEnumerable lines=File.ReadLines(“File.txt”);
尝试
{
数据集所有数据;
var downloadData=新TransformBlock(
异步行=>
{
HttpClientHandler HttpClientHandler=新的HttpClientHandler();
HttpClient=新的HttpClient(httpClientHandler);
尝试
{
Http响应消息响应消息=
wait client.GetAsync(line).configureWait(false);
返回
wait responseMessage.Content.ReadAsByteArrayAsync().configurewait(false);
}
捕获(例外情况除外)
{
//一网打尽,减少测试代码
返回null;
}
最后
{
联锁增量(参考Finisheduls);
}
},
新的ExecutionDataflowBlockOptions
{
MaxDegreeOfParallelism=500,
});
foreach(行中的var行)
下载数据。发布(行);
downloadData.Complete();
等待下载数据。完成;
2.
List allTasks=new List();
SemaphoreSlim throttler=新的SemaphoreSlim(initialCount:DataflowBlockOptions.Unbounded);
foreach(行中的var行)
{
wait throttler.WaitAsync().ConfigureWait(false);
allTasks.Add(Task.Run)(异步()=>
{
尝试
{
HttpClientHandler HttpClientHandler=新的HttpClientHandler();
HttpClient=新的HttpClient(httpClientHandler);
尝试
{
HttpResponseMessage responseMessage=await client.GetAsync(行).ConfigureAwait(假);
var byteArray=await responseMessage.Content.ReadAsByteArrayAsync().ConfigureAwait(false);
}
捕获(例外情况除外)
{
}
联锁增量(参考Finisheduls);
}
捕获(例外情况除外)
{
}
}
最后
{
节流器释放();
}
}));
}
等待任务。WhenAll(所有任务);
3.
等待行。ForEachAsync(500,cancellationToken,async行=>
{
HttpClientHandler HttpClientHandler=新的HttpClientHandler();
HttpClient=新的HttpClient(httpClientHandler);
尝试
{
HttpResponseMessage responseMessage=await client.GetAsync(行).ConfigureAwait(假);
var byteArray=await responseMessage.Content.ReadAsByteArrayAsync().ConfigureAwait(false);
}
捕获(例外情况除外)
{
}
联锁增量(参考Finisheduls);
}
);
公共静态任务ForEachAsync(此IEnumerable源、int dop、CancellationToken CancellationToken、,
(职能机构)
{
返回任务.WhenAll(
来自Partitioner.Create(source).GetPartitions(dop.aspallel()中的分区
选择Task.Run(异步委托
{
使用(分区)
while(partition.MoveNext())
wait body(partition.Current).configurewait(false);
},取消令牌);
}
我从#3解决方案获得的最佳速度结果-大约12000个URL/分钟,在性能监视器中建立了10000个连接-cpu使用率约为1%
但是,当我将txt文件拆分为5个部分时,每个文件中的URL=10000 000,并运行我的程序的5个实例,汇总速度为25000 URL/min,并建立了30000个连接-cpu使用率为3%。我一直在玩的游戏,从500人增加到更多,但这并没有带来很大的变化。所以我的问题是-如何运行一个可以处理25000个URL/分钟的程序实例?如何划分异步作业以获得尽可能高的速度
进程是否有.NET限制
该程序正在64位windows server 2012上运行
500Mb网络,64GB RAM,SSD磁盘,E5-1620-v2 CPU
更新1
同时针对不同“dop”和4个实例的速度结果:
我认为你的问题与此有关:
尝试将最大连接数增加到并发运行的任务数(可能是核心数)。删除外部任务可能会有帮助吗 一些类似的东西(不包括异常处理?)
List allTasks=new List();
foreach(行中的var行)
{
HttpClientHandler HttpClientHandler=新的HttpClientHandler();
HttpClient=新的HttpClient(httpClientHandler);
尝试
{
allTasks.Add(client.GetAsync(行)。
ContinueWith(t=>t.Result.Content.ReadAsByteArrayAsync(),TaskContinuationOptions.OnlyOnRanToCompletion));
}
抓住
{
}
}
等待任务。WhenAll(所有任务);
如果可以想象,您在等待响应的外部任务中消耗了太多的线程池资源?不确定调度器将如何处理此问题,但是外部任务对我来说似乎是多余的。尝试将
System.Net.ServicePointManager.DefaultConnectionLimit
设置为非常高的数字,例如int.MaxValue
这在很大程度上取决于它运行的硬件。你希望我们如何回答这个问题?我写过硬件可以在5个实例中轻松处理25000个URL/分钟摘要我知道你提到过分区,但是你是否将其增加到了5倍?是的,我尝试了2500个分区-甚至5000个分区-速度没有增加多少-只有13000-14000个URL/minWhats与每个域500个连接相比,2500/5000的CPU配置文件是有限的-正如我说的,我有“唯一”域列表OK,我不明白,对不起,为什么停止了?你可能仍然需要控制它。到15000 ur花了多长时间
IEnumerable<string> lines = File.ReadLines("file.txt");
try
{
DataSet allData;
var downloadData = new TransformBlock<string,byte[]>(
async line =>
{
HttpClientHandler httpClientHandler = new HttpClientHandler();
HttpClient client = new HttpClient(httpClientHandler);
try
{
HttpResponseMessage responseMessage =
await client.GetAsync(line).ConfigureAwait(false);
return
await responseMessage.Content.ReadAsByteArrayAsync().ConfigureAwait(false);
}
catch (Exception ex)
{
//catch all to reduce code for testing
return null;
}
finally
{
Interlocked.Increment(ref finishedUrls);
}
},
new ExecutionDataflowBlockOptions
{
MaxDegreeOfParallelism = 500,
});
foreach (var line in lines)
downloadData.Post(line);
downloadData.Complete();
await downloadData.Completion;
List<Task> allTasks = new List<Task>();
SemaphoreSlim throttler = new SemaphoreSlim(initialCount: DataflowBlockOptions.Unbounded);
foreach (var line in lines)
{
await throttler.WaitAsync().ConfigureAwait(false);
allTasks.Add(Task.Run(async () =>
{
try
{
HttpClientHandler httpClientHandler = new HttpClientHandler();
HttpClient client = new HttpClient(httpClientHandler);
try
{
HttpResponseMessage responseMessage = await client.GetAsync(line).ConfigureAwait(false);
var byteArray = await responseMessage.Content.ReadAsByteArrayAsync().ConfigureAwait(false);
}
catch (Exception ex)
{
}
Interlocked.Increment(ref finishedUrls);
}
catch (Exception ex)
{
}
}
finally
{
throttler.Release();
}
}));
}
await Task.WhenAll(allTasks);
await lines.ForEachAsync(500,cancellationToken,async line =>
{
HttpClientHandler httpClientHandler = new HttpClientHandler();
HttpClient client = new HttpClient(httpClientHandler);
try
{
HttpResponseMessage responseMessage = await client.GetAsync(line).ConfigureAwait(false);
var byteArray = await responseMessage.Content.ReadAsByteArrayAsync().ConfigureAwait(false);
}
catch (Exception ex)
{
}
Interlocked.Increment(ref finishedUrls);
}
);
public static Task ForEachAsync<T>(this IEnumerable<T> source, int dop, CancellationToken cancellationToken,
Func<T, Task> body)
{
return Task.WhenAll(
from partition in Partitioner.Create(source).GetPartitions(dop).AsParallel()
select Task.Run(async delegate
{
using (partition)
while (partition.MoveNext())
await body(partition.Current).ConfigureAwait(false);
}, cancellationToken));
}
List<Task> allTasks = new List<Task>();
foreach (var line in lines)
{
HttpClientHandler httpClientHandler = new HttpClientHandler();
HttpClient client = new HttpClient(httpClientHandler);
try
{
allTasks.Add(client.GetAsync(line).
ContinueWith(t => t.Result.Content.ReadAsByteArrayAsync(), TaskContinuationOptions.OnlyOnRanToCompletion));
}
catch
{
}
}
await Task.WhenAll(allTasks);