Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/314.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 这是CookieContainer bug吗?_C#_Cookiecontainer - Fatal编程技术网

C# 这是CookieContainer bug吗?

C# 这是CookieContainer bug吗?,c#,cookiecontainer,C#,Cookiecontainer,我在做什么: 我正在开发一个“webscraper”(多线程),就是这样,lol.在从页面提取数据之前,我需要提交一个表单,因此布局如下: 获取对example.com/path/doc.jsp(我的数据)的请求 检查确认单是否存在于单据来源中。如果是,则继续执行步骤3(我的数据不存在,需要先提交表格),否则返回(因为没有要提交的表格,我的数据在这里) 获取对example.com/path/sub/other.jsp的请求(必要的键值) 将请求发布到example.com/path/submi

我在做什么:
我正在开发一个“webscraper”(多线程),就是这样,lol.
在从页面提取数据之前,我需要提交一个表单,因此布局如下:

  • 获取对example.com/path/doc.jsp(我的数据)的请求
  • 检查确认单是否存在于单据来源中。如果是,则继续执行步骤3(我的数据不存在,需要先提交表格),否则返回(因为没有要提交的表格,我的数据在这里)
  • 获取对example.com/path/sub/other.jsp的请求(必要的键值)
  • 将请求发布到example.com/path/submit.jsp(发送值)
  • 检查POST请求的响应,如果确定,则转到6,否则返回1
  • 获取对example.com/path/doc.jsp的请求(同样是我的数据。由于我提交了表单,现在我的数据将显示)
  • 除了POST请求(第4步)的响应告诉我返回到第1步之外,一切正常。

    问题:
    我需要从cookies中提取表单中的一个值,因此我使用
    GetCookies()
    函数,但是,正如我所说的,如果响应告诉我返回步骤1,那么之后的所有请求(包括GET和POST)都会丢失cookies(并添加奇怪的cookies)。请参见下图:


    图像说明:

    public class CWeb : IDisposable
    {
        private WebClientEx _wc;
        private string _originalUrl;
    
        public CWeb()
        {
            _wc = new WebClientEx(new CookieContainer());
        }
    
        public string downloadPage(string url)
        {
            _originalUrl = url;
            string pgSrc = "error";
            int tries = 0;
    
            while (tries < 3 && pgSrc == "error)
            {
                try
                {
                    pgSrc = _wc.DownloadString(url);
                }
                catch (Exception err)
                {
                    tries += 1;
                    pgSrc = "error";
                    ...
                }
            }
    
            if (needSubmit(pgSrc)) // needSubmit just peform IndexOf on pgSrc
                do
                {
                    pgSrc = sendForm(pgSrc);
                } while (needSubmit(pgSrc));
    
            return WebUtility.HtmlDecode(pgSrc);
        }
    
        public string sendForm(pageSource)
        {
            // 1- Get Cookie Value
            string cookie = _wc.CookieContainer.GetCookies(new Uri(_originalUrl))["JSESSIONID"].Value;
    
            // 2- Get hidden values in pageSource parameter
            // skip this, since there's no web request here, only some html parsing
            // with Html Agility Pack
            ...
    
            // 3- Get key value
            string tmpStr = _wc.DownloadString("http://example.com/path/sub/other.jsp");
            ... more html parsing ...
    
            // 4- Build form
            NameValueCollection nvc = new NameValueCollection();
            nvc["param1"] = cookie;
            nvc["param2"] = key;
            ...
    
            // 5- Send
            _wc.UploadValues("example.com/path/submit.jsp", nvc);
    
            // 6- Return
            return _wc.DownloadString(_originalUrl);
        }
    
        public void Dispose()
        {
            _wc.Dispose();
        }
    }
    
    static void Main(string[] args)
    {
        // Load tons of 'doc' url list from database...
        List<string> urls = new List<string>();
        ...
    
        Parallel.ForEach(urls, (url) =>
            {
                using (CWeb crawler = new CWeb())
                {
                    string pageData = crawler.downloadPage(url);
                    ... parse html data here ...
                }
            });
    }
    
    • 第一个调用是对doc.jsp的GET请求,我的数据在这里
    • 第二个调用是other.jsp请求,因为确认表单出现在doc.jsp源代码中
    • 第三个调用是当我提交所有值时
    • 第四个调用是对doc.jsp的GET请求,因为提交表单的响应(第三个调用)告诉我重复这个过程。基本上,4º~6º调用与1º~3º调用相同,但使用cookies

    我的代码:

    public class CWeb : IDisposable
    {
        private WebClientEx _wc;
        private string _originalUrl;
    
        public CWeb()
        {
            _wc = new WebClientEx(new CookieContainer());
        }
    
        public string downloadPage(string url)
        {
            _originalUrl = url;
            string pgSrc = "error";
            int tries = 0;
    
            while (tries < 3 && pgSrc == "error)
            {
                try
                {
                    pgSrc = _wc.DownloadString(url);
                }
                catch (Exception err)
                {
                    tries += 1;
                    pgSrc = "error";
                    ...
                }
            }
    
            if (needSubmit(pgSrc)) // needSubmit just peform IndexOf on pgSrc
                do
                {
                    pgSrc = sendForm(pgSrc);
                } while (needSubmit(pgSrc));
    
            return WebUtility.HtmlDecode(pgSrc);
        }
    
        public string sendForm(pageSource)
        {
            // 1- Get Cookie Value
            string cookie = _wc.CookieContainer.GetCookies(new Uri(_originalUrl))["JSESSIONID"].Value;
    
            // 2- Get hidden values in pageSource parameter
            // skip this, since there's no web request here, only some html parsing
            // with Html Agility Pack
            ...
    
            // 3- Get key value
            string tmpStr = _wc.DownloadString("http://example.com/path/sub/other.jsp");
            ... more html parsing ...
    
            // 4- Build form
            NameValueCollection nvc = new NameValueCollection();
            nvc["param1"] = cookie;
            nvc["param2"] = key;
            ...
    
            // 5- Send
            _wc.UploadValues("example.com/path/submit.jsp", nvc);
    
            // 6- Return
            return _wc.DownloadString(_originalUrl);
        }
    
        public void Dispose()
        {
            _wc.Dispose();
        }
    }
    
    static void Main(string[] args)
    {
        // Load tons of 'doc' url list from database...
        List<string> urls = new List<string>();
        ...
    
        Parallel.ForEach(urls, (url) =>
            {
                using (CWeb crawler = new CWeb())
                {
                    string pageData = crawler.downloadPage(url);
                    ... parse html data here ...
                }
            });
    }
    
    公共类CWeb:IDisposable
    {
    私人网络客户端;
    私有字符串_originalUrl;
    公共CWeb()
    {
    _wc=新的WebClientEx(新的CookieContainer());
    }
    公共字符串下载页面(字符串url)
    {
    _originalUrl=url;
    字符串pgSrc=“error”;
    int=0;
    while(尝试<3&&pgSrc==”错误)
    {
    尝试
    {
    pgSrc=_wc.DownloadString(url);
    }
    捕获(异常错误)
    {
    尝试次数+=1;
    pgSrc=“错误”;
    ...
    }
    }
    if(needSubmit(pgSrc))//needSubmit只需在pgSrc上设置IndexOf
    做
    {
    pgSrc=sendForm(pgSrc);
    }while(needSubmit(pgSrc));
    返回WebUtility.HtmlDecode(pgSrc);
    }
    公共字符串sendForm(pageSource)
    {
    //1-获取Cookie值
    字符串cookie=_wc.CookieContainer.GetCookies(新Uri(_originalUrl))[“JSESSIONID”].Value;
    //2-获取pageSource参数中的隐藏值
    //跳过这个,因为这里没有web请求,只有一些html解析
    //使用Html敏捷包
    ...
    //3-获取关键值
    字符串tmpStr=_wc.DownloadString(“http://example.com/path/sub/other.jsp");
    …更多html解析。。。
    //4-建造形式
    NameValueCollection nvc=新的NameValueCollection();
    nvc[“param1”]=cookie;
    nvc[“参数2”]=键;
    ...
    //5-发送
    _UploadValues(“example.com/path/submit.jsp”,nvc);
    //6-返回
    返回_wc.DownloadString(_originalUrl);
    }
    公共空间处置()
    {
    _wc.Dispose();
    }
    }
    

    主程序:

    public class CWeb : IDisposable
    {
        private WebClientEx _wc;
        private string _originalUrl;
    
        public CWeb()
        {
            _wc = new WebClientEx(new CookieContainer());
        }
    
        public string downloadPage(string url)
        {
            _originalUrl = url;
            string pgSrc = "error";
            int tries = 0;
    
            while (tries < 3 && pgSrc == "error)
            {
                try
                {
                    pgSrc = _wc.DownloadString(url);
                }
                catch (Exception err)
                {
                    tries += 1;
                    pgSrc = "error";
                    ...
                }
            }
    
            if (needSubmit(pgSrc)) // needSubmit just peform IndexOf on pgSrc
                do
                {
                    pgSrc = sendForm(pgSrc);
                } while (needSubmit(pgSrc));
    
            return WebUtility.HtmlDecode(pgSrc);
        }
    
        public string sendForm(pageSource)
        {
            // 1- Get Cookie Value
            string cookie = _wc.CookieContainer.GetCookies(new Uri(_originalUrl))["JSESSIONID"].Value;
    
            // 2- Get hidden values in pageSource parameter
            // skip this, since there's no web request here, only some html parsing
            // with Html Agility Pack
            ...
    
            // 3- Get key value
            string tmpStr = _wc.DownloadString("http://example.com/path/sub/other.jsp");
            ... more html parsing ...
    
            // 4- Build form
            NameValueCollection nvc = new NameValueCollection();
            nvc["param1"] = cookie;
            nvc["param2"] = key;
            ...
    
            // 5- Send
            _wc.UploadValues("example.com/path/submit.jsp", nvc);
    
            // 6- Return
            return _wc.DownloadString(_originalUrl);
        }
    
        public void Dispose()
        {
            _wc.Dispose();
        }
    }
    
    static void Main(string[] args)
    {
        // Load tons of 'doc' url list from database...
        List<string> urls = new List<string>();
        ...
    
        Parallel.ForEach(urls, (url) =>
            {
                using (CWeb crawler = new CWeb())
                {
                    string pageData = crawler.downloadPage(url);
                    ... parse html data here ...
                }
            });
    }
    
    static void Main(字符串[]args)
    {
    //从数据库加载成吨的“文档”url列表。。。
    列表URL=新列表();
    ...
    Parallel.ForEach(url,(url)=>
    {
    使用(CWeb爬虫程序=新CWeb())
    {
    字符串pageData=crawler.downloadPage(url);
    …在此解析html数据。。。
    }
    });
    }
    

    我的环境:

    public class CWeb : IDisposable
    {
        private WebClientEx _wc;
        private string _originalUrl;
    
        public CWeb()
        {
            _wc = new WebClientEx(new CookieContainer());
        }
    
        public string downloadPage(string url)
        {
            _originalUrl = url;
            string pgSrc = "error";
            int tries = 0;
    
            while (tries < 3 && pgSrc == "error)
            {
                try
                {
                    pgSrc = _wc.DownloadString(url);
                }
                catch (Exception err)
                {
                    tries += 1;
                    pgSrc = "error";
                    ...
                }
            }
    
            if (needSubmit(pgSrc)) // needSubmit just peform IndexOf on pgSrc
                do
                {
                    pgSrc = sendForm(pgSrc);
                } while (needSubmit(pgSrc));
    
            return WebUtility.HtmlDecode(pgSrc);
        }
    
        public string sendForm(pageSource)
        {
            // 1- Get Cookie Value
            string cookie = _wc.CookieContainer.GetCookies(new Uri(_originalUrl))["JSESSIONID"].Value;
    
            // 2- Get hidden values in pageSource parameter
            // skip this, since there's no web request here, only some html parsing
            // with Html Agility Pack
            ...
    
            // 3- Get key value
            string tmpStr = _wc.DownloadString("http://example.com/path/sub/other.jsp");
            ... more html parsing ...
    
            // 4- Build form
            NameValueCollection nvc = new NameValueCollection();
            nvc["param1"] = cookie;
            nvc["param2"] = key;
            ...
    
            // 5- Send
            _wc.UploadValues("example.com/path/submit.jsp", nvc);
    
            // 6- Return
            return _wc.DownloadString(_originalUrl);
        }
    
        public void Dispose()
        {
            _wc.Dispose();
        }
    }
    
    static void Main(string[] args)
    {
        // Load tons of 'doc' url list from database...
        List<string> urls = new List<string>();
        ...
    
        Parallel.ForEach(urls, (url) =>
            {
                using (CWeb crawler = new CWeb())
                {
                    string pageData = crawler.downloadPage(url);
                    ... parse html data here ...
                }
            });
    }
    
    • 使用Visual Studio Professional 2013
    • 目标框架是.NETFramework 4.5
    • 平台x86(调试)
    • WebClient Tex是WebClient的扩展版本,用于处理Cookie。请访问此处。我曾尝试实施
      错误修复\u CookieDomain()
      (从),但即使使用该修复程序,此问题仍会发生
    • 我所有的url都包含http://前缀

    • 使用Fiddler查看请求信息

    • 英语不是我的母语…“-”
    我使用的功能类似于您正在做的事情。它通过一个名为CookieContainer的属性使用Http(WebRequest的HttpWebRequest子类)来处理cookies。我注意到cookies被添加到cookies容器中,显然也被从cookie容器中删除。我相信这完全由服务器端控制(您正在向其发出请求的web应用)。它能够添加其他cookie

    此外,如果Cookie具有过期日期、放弃标志和域,那么如果过期日期过期,服务器将设置放弃标志,或者域更改,则适用Cookie的列表可能会更改

    不确定这是否有用,但我尝试了