C# 表格断断续续地变成空白
我有一个使用webbrowser控件开发的网络爬虫。有时它工作得很好–它会到达登录页面 主页 目录菜单页 产品详细信息页面 返回目录菜单页。但在某些时候,它只是到达主页,然后它会给出一个空白页面。它没有获得所需的元素ID 我尝试了下面给出的各种建议……它们没有帮助。有趣的是,这个问题是间歇性的。有没有想过需要做些什么来克服这个间歇性问题 注意:我将无法共享该网站的url,因为它是一个内部网站 参考资料 代码C# 表格断断续续地变成空白,c#,.net,winforms,browser,webbrowser-control,C#,.net,Winforms,Browser,Webbrowser Control,我有一个使用webbrowser控件开发的网络爬虫。有时它工作得很好–它会到达登录页面 主页 目录菜单页 产品详细信息页面 返回目录菜单页。但在某些时候,它只是到达主页,然后它会给出一个空白页面。它没有获得所需的元素ID 我尝试了下面给出的各种建议……它们没有帮助。有趣的是,这个问题是间歇性的。有没有想过需要做些什么来克服这个间歇性问题 注意:我将无法共享该网站的url,因为它是一个内部网站 参考资料 代码 public partial class Form1 : Form {
public partial class Form1 : Form
{
private System.Windows.Forms.WebBrowser wb = null;
private ListBox listBox1 = null;
List<string> visitedUrls = new List<string>();
List<string> visitedProducts = new List<string>();
bool isFirstPage = true;
string clickType = String.Empty;
bool isUnvisitedProductExist = true;
private void ExerciseApp(object sender, EventArgs e)
{
#region Listbox Data Filling
if (listBox1.Items.Count == 0)
{
listBox1.Items.Add("Start--" + DateTime.Now.ToString());
}
else
{
if (listBox1.Items.Count == 2)
{
listBox1.Items.RemoveAt(1);
}
listBox1.Items.Add("Now--" + DateTime.Now.ToString());
}
#endregion
WriteLogFunction(" -----------------------------------------------");
#region Login
//Check whether login page
if (isFirstPage)
{
HtmlElement logonId = this.wb.Document.GetElementById("logonId");
HtmlElement password = this.wb.Document.GetElementById("logonPassword");
HtmlElement btnLogin = this.wb.Document.GetElementById("WC_AccountDisplay_links_2");
if (logonId != null && password != null && btnLogin != null)
{
logonId.InnerText = ConfigValues.userName;
password.InnerText = ConfigValues.passwordText;
isFirstPage = false;
//Call click for login
btnLogin.InvokeMember("click");
}
}
#endregion
bool isClickCalled = false;
#region Specific Product Details
int catalogElementIterationCounter = 0;
var elementsToConsider = wb.Document.All;
bool isMenuPage = false;
foreach (HtmlElement e1 in elementsToConsider)
{
catalogElementIterationCounter++;
string x = e1.TagName;
String idStr = e1.GetAttribute("id");
if (!String.IsNullOrWhiteSpace(idStr))
{
//Each Product Navigation
if (idStr.Contains("catalogEntry_img"))
{
isMenuPage = true;
string productUrl = e1.GetAttribute("href");
if (!visitedProducts.Contains(productUrl))
{
WriteLogFunction("p__" + productUrl);
isUnvisitedProductExist = true;
visitedProducts.Add(productUrl);
isClickCalled = true;
clickType = "Product";
e1.InvokeMember("Click");
break;
}
}
}
if (isMenuPage)
{
//Even after traversing the page, there is no unvisited product pending.
//So good to go for next page
if (catalogElementIterationCounter == elementsToConsider.Count - 1)
{
isUnvisitedProductExist = false;
}
}
}
#endregion
#region Menu Page
if (!isClickCalled)
{
#region Time Delay
try
{
DateTime start = DateTime.Now;
if (!wb.IsDisposed)
{
while (wb.ReadyState != WebBrowserReadyState.Complete)
{
System.Windows.Forms.Application.DoEvents();
if (wb.IsDisposed || DateTime.Now.Subtract(start).TotalSeconds > 2)
{
// Time limit break and dispose break
break;
}
}
}
}
catch (Exception ex)
{
WriteLogFunction(ex.Message);
//Supress the exception
}
#endregion
int menuPageIterationCounter = 0;
bool isMatchFound = false;
WriteLogFunction("Count-" + wb.Document.All.Count);
var elementsInMenuPage = wb.Document.All;
foreach (HtmlElement e1 in elementsInMenuPage)
{
menuPageIterationCounter++;
string x = e1.TagName;
String idStr = e1.GetAttribute("id");
WriteLogFunction("Before--"+idStr);
#region time Delay
try
{
DateTime start = DateTime.Now;
if (!wb.IsDisposed)
{
while (wb.ReadyState != WebBrowserReadyState.Complete)
{
System.Windows.Forms.Application.DoEvents();
if (wb.IsDisposed || DateTime.Now.Subtract(start).TotalSeconds > 50)
{
// Time limit break and dispose break
break;
}
}
}
}
catch (Exception ex)
{
WriteLogFunction(ex.Message);
//Supress the exception
}
#endregion
WriteLogFunction("After--" + idStr);
//Main Menu Item Navigation
if (idStr.Contains("WC_CachedHeaderDisplay_links"))
{
WriteLogFunction("*******INSIDE");
string url = e1.GetAttribute("href");
string latestUrl = String.Empty;
if (visitedUrls.Count > 0)
{
latestUrl = visitedUrls[visitedUrls.Count - 1];
}
WriteLogFunction("L__" + latestUrl);
WriteLogFunction("isUnvisitedProductExist__" + isUnvisitedProductExist.ToString());
if (visitedUrls.Contains(url) && isUnvisitedProductExist)
{
if (latestUrl == url)
{
isMatchFound = true;
clickType = "Menu";
WriteLogFunction("u1__" + url);
e1.InvokeMember("Click");
break;
}
}
else if (!visitedUrls.Contains(url))
{
isMatchFound = true;
//Reset visited Products
visitedProducts = new List<string>();
visitedUrls.Add(url);
clickType = "Menu";
WriteLogFunction("u2__" + url);
e1.InvokeMember("Click");
break;
}
if (!isMatchFound && (menuPageIterationCounter == elementsInMenuPage.Count - 1))
{
//wb.Navigate(websiteUrl);
//Application.Exit();
//Environment.Exit(0);
}
}
}
}
#endregion
}
public Form1()
{
// listBox1
listBox1 = new ListBox();
listBox1.Location = new Point(10, 10);
listBox1.Size = new Size(500, 50);
this.Controls.Add(listBox1);
// Web Browser
wb = new WebBrowser();
wb.Location = new Point(10, 80);
wb.Size = new Size(900, 900);
//wb.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(ExerciseApp);
wb.ScriptErrorsSuppressed = true;
wb.Url = new Uri(ConfigValues.websiteUrl);
// Form1
this.Text = "Web Browser Test";
this.Size = new Size(950, 950);
this.Controls.Add(wb);
this.Load += Form1_Load;
}
private void Form1_Load(object sender, EventArgs e)
{
this.wb.DocumentCompleted += delegate
{
// DocumentCompleted is fired before window.onload and body.onload
this.wb.Document.Window.AttachEventHandler("onload", delegate
{
// Defer this to make sure all possible onload event handlers got fired
System.Threading.SynchronizationContext.Current.Post(delegate
{
MessageBox.Show("window.onload was fired, can access DOM!");
ExerciseApp(null, null);
}, null);
});
};
this.wb.Navigate(ConfigValues.websiteUrl);
}
private void WriteLogFunction(string strMessage)
{
using (StreamWriter w = File.AppendText("log.txt"))
{
w.WriteLine("\r\n{0} {1} ", DateTime.Now.ToLongTimeString(), strMessage);
}
}
}
公共部分类表单1:表单
{
private System.Windows.Forms.WebBrowser wb=null;
私有ListBox listBox1=null;
List visitedUrls=新建列表();
列出已访问的产品=新列表();
bool isFirstPage=true;
string clickType=string.Empty;
bool isUnvisitedProductExist=true;
私有void ExerciseApp(对象发送方,事件参数e)
{
#区域列表框数据填充
如果(listBox1.Items.Count==0)
{
listBox1.Items.Add(“开始--”+DateTime.Now.ToString());
}
其他的
{
if(listBox1.Items.Count==2)
{
列表框1.Items.RemoveAt(1);
}
listBox1.Items.Add(“现在--”+DateTime.Now.ToString());
}
#端区
WriteLogFunction(“-------------------------------------------------------------”;
#区域登录
//检查是否登录页面
如果(第一页)
{
HtmlElement logonId=this.wb.Document.GetElementById(“logonId”);
HtmlElement password=this.wb.Document.GetElementById(“logonPassword”);
HtmlElement btnLogin=this.wb.Document.GetElementById(“WC\u AccountDisplay\u links\u 2”);
if(logonId!=null&&password!=null&&btnLogin!=null)
{
logonId.InnerText=ConfigValues.userName;
password.InnerText=ConfigValues.passwordText;
isFirstPage=false;
//呼叫并单击以登录
btnLogin.InvokeMember(“单击”);
}
}
#端区
bool isClickCalled=false;
#特定地区的产品详细信息
int catalogElementIterationCounter=0;
var elementstoconsive=wb.Document.All;
bool-isMenuPage=false;
foreach(元素中的HtmlElement e1考虑)
{
catalogElementIterationCounter++;
字符串x=e1.TagName;
字符串idStr=e1.GetAttribute(“id”);
如果(!String.IsNullOrWhiteSpace(idStr))
{
//每个产品导航
if(idStr.Contains(“目录条目”\u img))
{
isMenuPage=true;
字符串productUrl=e1.GetAttribute(“href”);
如果(!visitedProducts.Contains(productUrl))
{
WriteLogFunction(“p_uuuu”+productUrl);
isUnvisitedProductExist=true;
visitedProducts.Add(productUrl);
isClickCalled=true;
单击type=“产品”;
e1.调用成员(“单击”);
打破
}
}
}
如果(第页)
{
//即使在遍历页面之后,也没有未访问的产品挂起。
//下一页很好
if(catalogElementIterationCounter==ElementsToConserve.Count-1)
{
isUnvisitedProductExist=false;
}
}
}
#端区
#区域菜单页
如果(!isClickCalled)
{
#区域时延
尝试
{
DateTime start=DateTime.Now;
如果(!wb.IsDisposed)
{
while(wb.ReadyState!=webbrowserereadystate.Complete)
{
System.Windows.Forms.Application.DoEvents();
如果(wb.IsDisposed | | DateTime.Now.Subtract(start).TotalSeconds>2)
{
//时限中断和处置中断
打破
}
}
}
}
捕获(例外情况除外)
{
WriteLogFunction(例如消息);
//排除例外
}
#端区
int menuPageIterationCounter=0;
bool isMatchFound=false;
WriteLogFunction(“计数-”+wb.Document.All.Count);
var elementsInMenuPage=wb.Document.All;
foreach(元素菜单页中的HtmlElement e1)
{
menuPageIterationCounter++;
字符串x=e1.TagName;
字符串idStr=e1.GetAttribute(“id”);
WriteLogFunction(“之前--”+idStr);
#区域时延
尝试
{
DateTime start=DateTime.Now;
如果(!wb.IsDisposed)
{
while(wb.ReadyState!=webbrowserereadystate.Complete)
{
System.Windows.Forms.Application.DoEvents();
如果(wb.IsDisposed | | DateTime.Now.Subtract(start).TotalSeconds>50)
{
//时限中断和处置中断
打破
}
}
}
}
捕获(例外情况除外)