Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/319.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C#异步WebRequests:在所有请求完成时执行操作_C#_Asynchronous_Webrequest_Waithandle - Fatal编程技术网

C#异步WebRequests:在所有请求完成时执行操作

C#异步WebRequests:在所有请求完成时执行操作,c#,asynchronous,webrequest,waithandle,C#,Asynchronous,Webrequest,Waithandle,我在C#中有一个基本的抓取控制台应用程序,它异步使用WebRequest从站点列表中获取html。它工作得很好,但是我如何设置一个触发器,当列表中的每个站点都被处理时触发它呢 我花了几个小时在网上研究各种解决方案,包括MS文档,但没有一个通过代码提供直接的答案。我已经读过关于IAsyncResult.AsyncWaitHandle的文章,但是我不知道如何将它集成到我的代码中。我只想在所有线程完成处理或超时时调用自定义函数 其中一个技巧是,我永远不会提前知道我的列表中有多少站点(它是用户定义的),

我在C#中有一个基本的抓取控制台应用程序,它异步使用WebRequest从站点列表中获取html。它工作得很好,但是我如何设置一个触发器,当列表中的每个站点都被处理时触发它呢

我花了几个小时在网上研究各种解决方案,包括MS文档,但没有一个通过代码提供直接的答案。我已经读过关于IAsyncResult.AsyncWaitHandle的文章,但是我不知道如何将它集成到我的代码中。我只想在所有线程完成处理或超时时调用自定义函数

其中一个技巧是,我永远不会提前知道我的列表中有多少站点(它是用户定义的),因此我需要一个足够健壮的解决方案,以等待5个事件完成100000个事件

谢谢。工作代码如下:

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Net;
using System.Threading;


namespace AsyncApp_01
{
    class Program
    {
        static void Main(string[] args)
        {
            ArrayList alSites = new ArrayList();
            alSites.Add("http://www.google.com");
            alSites.Add("http://www.lostspires.com");

            ScanSites(alSites);

            Console.Read();
        }

        private static void ScanSites(ArrayList sites)
        {
            foreach (string uriString in sites)
            {
                WebRequest request = HttpWebRequest.Create(uriString);
                request.Method = "GET";
                object data = new object(); //container for our "Stuff"

                // RequestState is a custom class to pass info to the callback
                RequestState state = new RequestState(request, data, uriString);
                IAsyncResult result = request.BeginGetResponse(new AsyncCallback(UpdateItem), state);


                //Register the timeout callback
                ThreadPool.RegisterWaitForSingleObject(result.AsyncWaitHandle, new WaitOrTimerCallback(ScanTimeoutCallback), state, (30 * 1000), true);

            }
        }


        private static void UpdateItem(IAsyncResult result)
        {
            // grab the custom state object
            RequestState state = (RequestState)result.AsyncState;
            WebRequest request = (WebRequest)state.Request;

            // get the Response
            HttpWebResponse response = (HttpWebResponse)request.EndGetResponse(result);
            Stream s = (Stream)response.GetResponseStream();
            StreamReader readStream = new StreamReader(s);

            // dataString will hold the entire contents of the requested page if we need it.
            string dataString = readStream.ReadToEnd();
            response.Close();
            s.Close();
            readStream.Close();

            Console.WriteLine(dataString);
        }


        private static void ScanTimeoutCallback(object state, bool timedOut)
        {
            if (timedOut)
            {
                RequestState reqState = (RequestState)state;
                if (reqState != null)
                {
                    reqState.Request.Abort();
                }
                Console.WriteLine("aborted- timeout");
            }
        } 


        class RequestState
        {
            public WebRequest Request; // holds the request
            public object Data; // store any data in this
            public string SiteUrl; // holds the UrlString to match up results (Database lookup, etc).

            public RequestState(WebRequest request, object data, string siteUrl)
            {
                this.Request = request;
                this.Data = data;
                this.SiteUrl = siteUrl;
            }

        }
    }
}

任何人只要能告诉我如何限制并发线程的数量,就可以获得额外的积分。例如,如果我有100个站点要处理,我如何设置它以便一次处理10个站点,而不是更多。我不想打开100个线程。

这是我拼凑的一个快速示例。我删除了WebClient实现,因为您似乎正在使用WebRequest。我也在使用.Net 4的ConcurrentBag:

public class Scraper
{
    private readonly IEnumerable<string> _sites;
    private readonly ConcurrentBag<string> _data;
    private volatile int _count;
    private readonly int _total;
    public Scraper(IEnumerable<string> sites)
    {
        _sites = sites;
        _data = new ConcurrentBag<string>();
        _total = sites.Count();
    }

    public void Start()
    {
        foreach (var site in _sites)
        {
            ScrapeSite(site);
        }
    }

    private void ScrapeSite(string site)
    {
        var req = WebRequest.Create(site);
        req.BeginGetResponse(AsyncCallback, req);
    }

    private void AsyncCallback(IAsyncResult ar)
    {
        Interlocked.Increment(ref _count);
        var req = ar.AsyncState as WebRequest;

        var result = req.EndGetResponse(ar);
        var reader = new StreamReader(result.GetResponseStream());
        var data = reader.ReadToEnd();
        this.OnSiteScraped(req.RequestUri.AbsoluteUri, data);
        _data.Add(data);
        if (_count == _total)
        {
            OnScrapingComplete();
        }
    }

    private void OnSiteScraped(string site, string data)
    {
        var handler = this.SiteScraped;
        if (handler != null)
        {
            handler(this, new SiteScrapedEventArgs(site, data));
        }
    }

    private void OnScrapingComplete()
    {
        var handler = this.ScrapingComplete;
        if (handler != null)
        {
            handler(this, new ScrapingCompletedEventArgs(_data));
        }
    }

    public event EventHandler<SiteScrapedEventArgs> SiteScraped;
    public event EventHandler<ScrapingCompletedEventArgs> ScrapingComplete;
}

public class SiteScrapedEventArgs : EventArgs
{
    public string Site { get; private set; }
    public string Data { get; private set; }
    public SiteScrapedEventArgs(string site, string data)
    {
        this.Site = site;
        this.Data = data;
    }
}
公共类刮板
{
私有只读IEnumerable_站点;
私有只读ConcurrentBag_数据;
私有易失性整数计数;
专用只读整合式;
公共刮刀(IEnumerable站点)
{
_地点=地点;
_数据=新的ConcurrentBag();
_总数=sites.Count();
}
公开作废开始()
{
foreach(var站点在_站点中)
{
现场(现场);
}
}
私有站点(字符串站点)
{
var req=WebRequest.Create(站点);
req.BeginGetResponse(异步回调,req);
}
私有void异步回调(IAsyncResult ar)
{
联锁增量(参考计数);
var req=ar.asynchState作为WebRequest;
var结果=请求EndGetResponse(ar);
var reader=newstreamreader(result.GetResponseStream());
var data=reader.ReadToEnd();
此.OnSiteScraped(req.RequestUri.AbsoluteUri,数据);
_数据。添加(数据);
如果(_count==_total)
{
OnScrapingComplete();
}
}
站点上的私有void已删除(字符串站点、字符串数据)
{
var handler=this.sitesrapped;
if(处理程序!=null)
{
处理程序(此,新站点scrapedeventargs(站点,数据));
}
}
私有void OnScrapingComplete()
{
var handler=this.ScrapingComplete;
if(处理程序!=null)
{
处理程序(新的ScrapingCompletedEventArgs(_数据));
}
}
公共事件事件处理程序站点已删除;
公共事件处理程序ScrapingComplete;
}
公共类SiteScrapedEventArgs:EventArgs
{
公共字符串站点{get;private set;}
公共字符串数据{get;private set;}
公共站点ScrapedEventArgs(字符串站点、字符串数据)
{
this.Site=Site;
这个。数据=数据;
}
}
好的,我创建了一些基本类,这应该可以做到。如果这还不够,很抱歉,我帮不了你:

 public class RankedPage
    {
        public int Rank { get; set; }
        public string Site { get; set; }
    }

    public class WebRequestData
    {
        public WebRequest WebRequest { get; set; }
        public RankedPage Page { get; set; }
    }

    public class Scraper
    {
        private readonly IEnumerable<RankedPage> _sites;
        private readonly ConcurrentBag<KeyValuePair<RankedPage,string>> _data;
        private volatile int _count;
        private readonly int _total;
        public Scraper(IEnumerable<RankedPage> sites)
        {
            _sites = sites;
            _data = new ConcurrentBag<KeyValuePair<RankedPage, string>>();
            _total = sites.Count();
        }

        public void Start()
        {
            foreach (var site in _sites)
            {
                ScrapeSite(site);
            }
        }

        private void ScrapeSite(RankedPage site)
        {
            var req = WebRequest.Create(site.Site);
            req.BeginGetResponse(AsyncCallback, new WebRequestData{ Page = site, WebRequest = req});
        }

        private void AsyncCallback(IAsyncResult ar)
        {
            Interlocked.Increment(ref _count);
            var webRequestData = ar.AsyncState as WebRequestData;

            var req = webRequestData.WebRequest;
            var result = req.EndGetResponse(ar);
            var reader = new StreamReader(result.GetResponseStream());
            var data = reader.ReadToEnd();
            this.OnSiteScraped(webRequestData.Page, data);
            _data.Add(new KeyValuePair<RankedPage, string>(webRequestData.Page,data));
            if (_count == _total)
            {
                OnScrapingComplete();
            }
        }

        private void OnSiteScraped(RankedPage page, string data)
        {
            var handler = this.SiteScraped;
            if (handler != null)
            {
                handler(this, new SiteScrapedEventArgs(page, data));
            }
        }

        private void OnScrapingComplete()
        {
            var handler = this.ScrapingComplete;
            if (handler != null)
            {
                handler(this, new ScrapingCompletedEventArgs(_data));
            }
        }

        public event EventHandler<SiteScrapedEventArgs> SiteScraped;
        public event EventHandler<ScrapingCompletedEventArgs> ScrapingComplete;
    }

    public class SiteScrapedEventArgs : EventArgs
    {
        public RankedPage Site { get; private set; }
        public string Data { get; private set; }
        public SiteScrapedEventArgs(RankedPage site, string data)
        {
            this.Site = site;
            this.Data = data;
        }
    }

    public class ScrapingCompletedEventArgs : EventArgs
    {
        public IEnumerable<KeyValuePair<RankedPage,string >> SiteData { get; private set; }
        public ScrapingCompletedEventArgs(IEnumerable<KeyValuePair<RankedPage, string>> siteData)
        {
            this.SiteData = siteData;
        }
    }
公共类RankedPage
{
公共整数秩{get;set;}
公共字符串站点{get;set;}
}
公共类WebRequestData
{
公共WebRequest WebRequest{get;set;}
公共分级页面{get;set;}
}
公共类刮刀
{
私有只读IEnumerable_站点;
私有只读ConcurrentBag_数据;
私有易失性整数计数;
专用只读整合式;
公共刮刀(IEnumerable站点)
{
_地点=地点;
_数据=新的ConcurrentBag();
_总数=sites.Count();
}
公开作废开始()
{
foreach(var站点在_站点中)
{
现场(现场);
}
}
私人网站(RankedPage网站)
{
var req=WebRequest.Create(site.site);
BeginGetResponse(异步回调,新WebRequestData{Page=site,WebRequest=req});
}
私有void异步回调(IAsyncResult ar)
{
联锁增量(参考计数);
var webRequestData=ar.AsyncState作为webRequestData;
var req=webRequestData.WebRequest;
var结果=请求EndGetResponse(ar);
var reader=newstreamreader(result.GetResponseStream());
var data=reader.ReadToEnd();
此.OnSiteScraped(webRequestData.Page,数据);
_添加(新的KeyValuePair(webRequestData.Page,data));
如果(_count==_total)
{
OnScrapingComplete();
}
}
站点上的私有void已删除(RankedPage页,字符串数据)
{
var handler=this.sitesrapped;
if(处理程序!=null)
{
处理程序(此,新站点scrapedeventargs(页面,数据));
}
}
私有void OnScrapingComplete()
{
var handler=this.ScrapingComplete;
if(处理程序!=null)
{
处理程序(新的ScrapingCompletedEventArgs(_数据));
}
}
公共事件事件处理程序站点已删除;
公共事件处理程序ScrapingComplete;
}
公共类SiteScrapedEventArgs:EventArgs
{
公共RankedPage站点{get;private set;}
公共字符串数据{get;private set;}
公共站点ScrapedEventArgs(RankedPage站点,字符串数据)
{
this.Site=Site;
这个。数据=数据;
}
}
公共类ScrapingCompletedEventArgs:EventArgs
{
公共IEnumerable站点数据{get;private set;}
公共刮削完成前夕