C#异步WebRequests:在所有请求完成时执行操作
我在C#中有一个基本的抓取控制台应用程序,它异步使用WebRequest从站点列表中获取html。它工作得很好,但是我如何设置一个触发器,当列表中的每个站点都被处理时触发它呢 我花了几个小时在网上研究各种解决方案,包括MS文档,但没有一个通过代码提供直接的答案。我已经读过关于IAsyncResult.AsyncWaitHandle的文章,但是我不知道如何将它集成到我的代码中。我只想在所有线程完成处理或超时时调用自定义函数 其中一个技巧是,我永远不会提前知道我的列表中有多少站点(它是用户定义的),因此我需要一个足够健壮的解决方案,以等待5个事件完成100000个事件 谢谢。工作代码如下:C#异步WebRequests:在所有请求完成时执行操作,c#,asynchronous,webrequest,waithandle,C#,Asynchronous,Webrequest,Waithandle,我在C#中有一个基本的抓取控制台应用程序,它异步使用WebRequest从站点列表中获取html。它工作得很好,但是我如何设置一个触发器,当列表中的每个站点都被处理时触发它呢 我花了几个小时在网上研究各种解决方案,包括MS文档,但没有一个通过代码提供直接的答案。我已经读过关于IAsyncResult.AsyncWaitHandle的文章,但是我不知道如何将它集成到我的代码中。我只想在所有线程完成处理或超时时调用自定义函数 其中一个技巧是,我永远不会提前知道我的列表中有多少站点(它是用户定义的),
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Net;
using System.Threading;
namespace AsyncApp_01
{
class Program
{
static void Main(string[] args)
{
ArrayList alSites = new ArrayList();
alSites.Add("http://www.google.com");
alSites.Add("http://www.lostspires.com");
ScanSites(alSites);
Console.Read();
}
private static void ScanSites(ArrayList sites)
{
foreach (string uriString in sites)
{
WebRequest request = HttpWebRequest.Create(uriString);
request.Method = "GET";
object data = new object(); //container for our "Stuff"
// RequestState is a custom class to pass info to the callback
RequestState state = new RequestState(request, data, uriString);
IAsyncResult result = request.BeginGetResponse(new AsyncCallback(UpdateItem), state);
//Register the timeout callback
ThreadPool.RegisterWaitForSingleObject(result.AsyncWaitHandle, new WaitOrTimerCallback(ScanTimeoutCallback), state, (30 * 1000), true);
}
}
private static void UpdateItem(IAsyncResult result)
{
// grab the custom state object
RequestState state = (RequestState)result.AsyncState;
WebRequest request = (WebRequest)state.Request;
// get the Response
HttpWebResponse response = (HttpWebResponse)request.EndGetResponse(result);
Stream s = (Stream)response.GetResponseStream();
StreamReader readStream = new StreamReader(s);
// dataString will hold the entire contents of the requested page if we need it.
string dataString = readStream.ReadToEnd();
response.Close();
s.Close();
readStream.Close();
Console.WriteLine(dataString);
}
private static void ScanTimeoutCallback(object state, bool timedOut)
{
if (timedOut)
{
RequestState reqState = (RequestState)state;
if (reqState != null)
{
reqState.Request.Abort();
}
Console.WriteLine("aborted- timeout");
}
}
class RequestState
{
public WebRequest Request; // holds the request
public object Data; // store any data in this
public string SiteUrl; // holds the UrlString to match up results (Database lookup, etc).
public RequestState(WebRequest request, object data, string siteUrl)
{
this.Request = request;
this.Data = data;
this.SiteUrl = siteUrl;
}
}
}
}
任何人只要能告诉我如何限制并发线程的数量,就可以获得额外的积分。例如,如果我有100个站点要处理,我如何设置它以便一次处理10个站点,而不是更多。我不想打开100个线程。这是我拼凑的一个快速示例。我删除了WebClient实现,因为您似乎正在使用WebRequest。我也在使用.Net 4的ConcurrentBag:
public class Scraper
{
private readonly IEnumerable<string> _sites;
private readonly ConcurrentBag<string> _data;
private volatile int _count;
private readonly int _total;
public Scraper(IEnumerable<string> sites)
{
_sites = sites;
_data = new ConcurrentBag<string>();
_total = sites.Count();
}
public void Start()
{
foreach (var site in _sites)
{
ScrapeSite(site);
}
}
private void ScrapeSite(string site)
{
var req = WebRequest.Create(site);
req.BeginGetResponse(AsyncCallback, req);
}
private void AsyncCallback(IAsyncResult ar)
{
Interlocked.Increment(ref _count);
var req = ar.AsyncState as WebRequest;
var result = req.EndGetResponse(ar);
var reader = new StreamReader(result.GetResponseStream());
var data = reader.ReadToEnd();
this.OnSiteScraped(req.RequestUri.AbsoluteUri, data);
_data.Add(data);
if (_count == _total)
{
OnScrapingComplete();
}
}
private void OnSiteScraped(string site, string data)
{
var handler = this.SiteScraped;
if (handler != null)
{
handler(this, new SiteScrapedEventArgs(site, data));
}
}
private void OnScrapingComplete()
{
var handler = this.ScrapingComplete;
if (handler != null)
{
handler(this, new ScrapingCompletedEventArgs(_data));
}
}
public event EventHandler<SiteScrapedEventArgs> SiteScraped;
public event EventHandler<ScrapingCompletedEventArgs> ScrapingComplete;
}
public class SiteScrapedEventArgs : EventArgs
{
public string Site { get; private set; }
public string Data { get; private set; }
public SiteScrapedEventArgs(string site, string data)
{
this.Site = site;
this.Data = data;
}
}
公共类刮板
{
私有只读IEnumerable_站点;
私有只读ConcurrentBag_数据;
私有易失性整数计数;
专用只读整合式;
公共刮刀(IEnumerable站点)
{
_地点=地点;
_数据=新的ConcurrentBag();
_总数=sites.Count();
}
公开作废开始()
{
foreach(var站点在_站点中)
{
现场(现场);
}
}
私有站点(字符串站点)
{
var req=WebRequest.Create(站点);
req.BeginGetResponse(异步回调,req);
}
私有void异步回调(IAsyncResult ar)
{
联锁增量(参考计数);
var req=ar.asynchState作为WebRequest;
var结果=请求EndGetResponse(ar);
var reader=newstreamreader(result.GetResponseStream());
var data=reader.ReadToEnd();
此.OnSiteScraped(req.RequestUri.AbsoluteUri,数据);
_数据。添加(数据);
如果(_count==_total)
{
OnScrapingComplete();
}
}
站点上的私有void已删除(字符串站点、字符串数据)
{
var handler=this.sitesrapped;
if(处理程序!=null)
{
处理程序(此,新站点scrapedeventargs(站点,数据));
}
}
私有void OnScrapingComplete()
{
var handler=this.ScrapingComplete;
if(处理程序!=null)
{
处理程序(新的ScrapingCompletedEventArgs(_数据));
}
}
公共事件事件处理程序站点已删除;
公共事件处理程序ScrapingComplete;
}
公共类SiteScrapedEventArgs:EventArgs
{
公共字符串站点{get;private set;}
公共字符串数据{get;private set;}
公共站点ScrapedEventArgs(字符串站点、字符串数据)
{
this.Site=Site;
这个。数据=数据;
}
}
好的,我创建了一些基本类,这应该可以做到。如果这还不够,很抱歉,我帮不了你:
public class RankedPage
{
public int Rank { get; set; }
public string Site { get; set; }
}
public class WebRequestData
{
public WebRequest WebRequest { get; set; }
public RankedPage Page { get; set; }
}
public class Scraper
{
private readonly IEnumerable<RankedPage> _sites;
private readonly ConcurrentBag<KeyValuePair<RankedPage,string>> _data;
private volatile int _count;
private readonly int _total;
public Scraper(IEnumerable<RankedPage> sites)
{
_sites = sites;
_data = new ConcurrentBag<KeyValuePair<RankedPage, string>>();
_total = sites.Count();
}
public void Start()
{
foreach (var site in _sites)
{
ScrapeSite(site);
}
}
private void ScrapeSite(RankedPage site)
{
var req = WebRequest.Create(site.Site);
req.BeginGetResponse(AsyncCallback, new WebRequestData{ Page = site, WebRequest = req});
}
private void AsyncCallback(IAsyncResult ar)
{
Interlocked.Increment(ref _count);
var webRequestData = ar.AsyncState as WebRequestData;
var req = webRequestData.WebRequest;
var result = req.EndGetResponse(ar);
var reader = new StreamReader(result.GetResponseStream());
var data = reader.ReadToEnd();
this.OnSiteScraped(webRequestData.Page, data);
_data.Add(new KeyValuePair<RankedPage, string>(webRequestData.Page,data));
if (_count == _total)
{
OnScrapingComplete();
}
}
private void OnSiteScraped(RankedPage page, string data)
{
var handler = this.SiteScraped;
if (handler != null)
{
handler(this, new SiteScrapedEventArgs(page, data));
}
}
private void OnScrapingComplete()
{
var handler = this.ScrapingComplete;
if (handler != null)
{
handler(this, new ScrapingCompletedEventArgs(_data));
}
}
public event EventHandler<SiteScrapedEventArgs> SiteScraped;
public event EventHandler<ScrapingCompletedEventArgs> ScrapingComplete;
}
public class SiteScrapedEventArgs : EventArgs
{
public RankedPage Site { get; private set; }
public string Data { get; private set; }
public SiteScrapedEventArgs(RankedPage site, string data)
{
this.Site = site;
this.Data = data;
}
}
public class ScrapingCompletedEventArgs : EventArgs
{
public IEnumerable<KeyValuePair<RankedPage,string >> SiteData { get; private set; }
public ScrapingCompletedEventArgs(IEnumerable<KeyValuePair<RankedPage, string>> siteData)
{
this.SiteData = siteData;
}
}
公共类RankedPage
{
公共整数秩{get;set;}
公共字符串站点{get;set;}
}
公共类WebRequestData
{
公共WebRequest WebRequest{get;set;}
公共分级页面{get;set;}
}
公共类刮刀
{
私有只读IEnumerable_站点;
私有只读ConcurrentBag_数据;
私有易失性整数计数;
专用只读整合式;
公共刮刀(IEnumerable站点)
{
_地点=地点;
_数据=新的ConcurrentBag();
_总数=sites.Count();
}
公开作废开始()
{
foreach(var站点在_站点中)
{
现场(现场);
}
}
私人网站(RankedPage网站)
{
var req=WebRequest.Create(site.site);
BeginGetResponse(异步回调,新WebRequestData{Page=site,WebRequest=req});
}
私有void异步回调(IAsyncResult ar)
{
联锁增量(参考计数);
var webRequestData=ar.AsyncState作为webRequestData;
var req=webRequestData.WebRequest;
var结果=请求EndGetResponse(ar);
var reader=newstreamreader(result.GetResponseStream());
var data=reader.ReadToEnd();
此.OnSiteScraped(webRequestData.Page,数据);
_添加(新的KeyValuePair(webRequestData.Page,data));
如果(_count==_total)
{
OnScrapingComplete();
}
}
站点上的私有void已删除(RankedPage页,字符串数据)
{
var handler=this.sitesrapped;
if(处理程序!=null)
{
处理程序(此,新站点scrapedeventargs(页面,数据));
}
}
私有void OnScrapingComplete()
{
var handler=this.ScrapingComplete;
if(处理程序!=null)
{
处理程序(新的ScrapingCompletedEventArgs(_数据));
}
}
公共事件事件处理程序站点已删除;
公共事件处理程序ScrapingComplete;
}
公共类SiteScrapedEventArgs:EventArgs
{
公共RankedPage站点{get;private set;}
公共字符串数据{get;private set;}
公共站点ScrapedEventArgs(RankedPage站点,字符串数据)
{
this.Site=Site;
这个。数据=数据;
}
}
公共类ScrapingCompletedEventArgs:EventArgs
{
公共IEnumerable站点数据{get;private set;}
公共刮削完成前夕