C# 使用XPath和WebBrowser控件选择多个节点
在C#WinForms示例应用程序中,我使用WebBrowser控件和选择单个节点并通过以下代码更改该节点。innerHtml:C# 使用XPath和WebBrowser控件选择多个节点,c#,javascript,xpath,webbrowser-control,evaluate,C#,Javascript,Xpath,Webbrowser Control,Evaluate,在C#WinForms示例应用程序中,我使用WebBrowser控件和选择单个节点并通过以下代码更改该节点。innerHtml: private void MainForm_Load(object sender, EventArgs e) { webBrowser1.DocumentText = @" <html> <head> <script src="
private void MainForm_Load(object sender, EventArgs e)
{
webBrowser1.DocumentText = @"
<html>
<head>
<script src=""http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js""></script>
</head>
<body>
<img alt=""0764547763 Product Details""
src=""http://ecx.images-amazon.com/images/I/51AK1MRIi7L._AA160_.jpg"">
<hr/>
<h2>Product Details</h2>
<ul>
<li><b>Paperback:</b> 648 pages</li>
<li><b>Publisher:</b> Wiley; Unlimited Edition edition (October 15, 2001)</li>
<li><b>Language:</b> English</li>
<li><b>ISBN-10:</b> 0764547763</li>
</ul>
</body>
</html>
";
}
private void cmdTest_Click(object sender, EventArgs e)
{
string xPath = "//li";
string code = string.Format("document.evaluate('{0}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;", xPath);
var li = webBrowser1.Document.InvokeScript("eval", new object[] { code }) as mshtml.IHTMLElement;
li.innerHTML = string.Format("<span style='text-transform: uppercase;font-family:verdana;color:green;'>{0}</span>", li.innerText);
}
但是allLI
变量的返回值是NULL
如果我写信
xPath = "//ul//*";
code = string.Format("document.evaluate('{0}', document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);", xPath);
var allLI = webBrowser1.Document.InvokeScript("eval", new object[] { code });
然后返回的allLI
变量不为null,其值类型为COM对象
,但我不清楚该COM对象
可以转换为哪种更具体的类型
有没有一种方法可以通过此处使用的技术选择多个节点
[编辑]
xPath=“ul/*”
到
xPath=“///ul//*”
[添加]
我在示例HTML中添加了两个javaScript函数:
<script type=""text/javascript"">
function GetElementsText (XPath) {
var xPathRes = document.evaluate ( XPath, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
var nextElement = xPathRes.iterateNext ();
var text = """";
while (nextElement) {
text += nextElement.innerText;
nextElement = xPathRes.iterateNext ();
}
return text;
};
function GetElements (XPath) {
var xPathRes = document.evaluate ( XPath, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
var nextElement = xPathRes.iterateNext ();
var elements = new Object();
var elementIndex = 1;
while (nextElement) {
elements[elementIndex++] = nextElement;
nextElement = xPathRes.iterateNext ();
}
return elements;
};
</script>
我正在获取所有li
元素的文本:
"Paperback: 648 pages \r\nPublisher: Wiley; Unlimited Edition edition (October 15, 2001) \r\nLanguage: English \r\nISBN-10: 0764547763 "
当我在cmd\u TestClick
方法中运行以下C#代码行时:
var text = webBrowser1.Document.InvokeScript("eval", new object[] { "GetElementsText('//ul')" });
var elements = webBrowser1.Document.InvokeScript("eval", new object[] { "GetElements('//ul')" });
我正在获取COM对象
,无法将其转换为IEnumerable
有没有办法在C代码中处理由返回的HTML节点的javaScript集合
var elements = webBrowser1.Document.InvokeScript("eval", new object[] { "GetElements('//ul')" });
?我找到了解决方案,下面是代码:
using System;
using System.Collections.Generic;
using System.Reflection;
using System.Windows.Forms;
namespace myTest.WinFormsApp
{
public partial class MainForm : Form
{
public MainForm()
{
InitializeComponent();
}
private void MainForm_Load(object sender, EventArgs e)
{
webBrowser1.DocumentText = @"
<html>
<body>
<img alt=""0764547763 Product Details""
src=""http://ecx.images-amazon.com/images/I/51AK1MRIi7L._AA160_.jpg"">
<hr/>
<h2>Product Details</h2>
<ul>
<li><b>Paperback:</b> 648 pages</li>
<li><b>Publisher:</b> Wiley; Unlimited Edition edition (October 15, 2001)</li>
<li><b>Language:</b> English</li>
<li><b>ISBN-10:</b> 0764547763</li>
</html>
";
}
private void cmdTest_Click(object sender, EventArgs e)
{
var processor = new WebBrowserControlXPathQueriesProcessor(webBrowser1);
// change attributes of the first element of the list
{
var li = processor.GetHtmlElement("//li");
li.innerHTML = string.Format("<span style='text-transform: uppercase;font-family:verdana;color:green;'>{0}</span>", li.innerText);
}
// change attributes of the second and subsequent elements of the list
var list = processor.GetHtmlElements("//ul//li");
int index = 1;
foreach (var li in list)
{
if (index++ == 1) continue;
li.innerHTML = string.Format("<span style='text-transform: uppercase;font-family:verdana;color:blue;'>{0}</span>", li.innerText);
}
}
/// <summary>
/// Enables IE WebBrowser control to evaluate XPath queries
/// by injecting http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js
/// and to return XPath queries results to the calling C# code as strongly typed
/// mshtml.IHTMLElement and IEnumerable<mshtml.IHTMLElement>
/// </summary>
public class WebBrowserControlXPathQueriesProcessor
{
private System.Windows.Forms.WebBrowser _webBrowser;
public WebBrowserControlXPathQueriesProcessor(System.Windows.Forms.WebBrowser webBrowser)
{
_webBrowser = webBrowser;
injectScripts();
}
private void injectScripts()
{
// Thanks to: http://stackoverflow.com/questions/7998996/how-to-inject-javascript-in-webbrowser-control
HtmlElement head = _webBrowser.Document.GetElementsByTagName("head")[0];
HtmlElement scriptEl = _webBrowser.Document.CreateElement("script");
mshtml.IHTMLScriptElement element = (mshtml.IHTMLScriptElement)scriptEl.DomElement;
element.src = "http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js";
head.AppendChild(scriptEl);
string javaScriptText = @"
function GetElements (XPath) {
var xPathRes = document.evaluate ( XPath, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
var nextElement = xPathRes.iterateNext ();
var elements = new Object();
var elementIndex = 1;
while (nextElement) {
elements[elementIndex++] = nextElement;
nextElement = xPathRes.iterateNext ();
}
elements.length = elementIndex -1;
return elements;
};
";
scriptEl = _webBrowser.Document.CreateElement("script");
element = (mshtml.IHTMLScriptElement)scriptEl.DomElement;
element.text = javaScriptText;
head.AppendChild(scriptEl);
}
/// <summary>
/// Gets Html element's mshtml.IHTMLElement object instance using XPath query
/// </summary>
public mshtml.IHTMLElement GetHtmlElement(string xPathQuery)
{
string code = string.Format("document.evaluate('{0}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;", xPathQuery);
return _webBrowser.Document.InvokeScript("eval", new object[] { code }) as mshtml.IHTMLElement;
}
/// <summary>
/// Gets Html elements' IEnumerable<mshtml.IHTMLElement> object instance using XPath query
/// </summary>
public IEnumerable<mshtml.IHTMLElement> GetHtmlElements(string xPathQuery)
{
// Thanks to: http://stackoverflow.com/questions/5278275/accessing-properties-of-javascript-objects-using-type-dynamic-in-c-sharp-4
var comObject = _webBrowser.Document.InvokeScript("eval", new object[] { string.Format("GetElements('{0}')", xPathQuery) });
Type type = comObject.GetType();
int length = (int)type.InvokeMember("length", BindingFlags.GetProperty, null, comObject, null);
for (int i = 1; i <= length; i++)
{
yield return type.InvokeMember(i.ToString(), BindingFlags.GetProperty, null, comObject, null) as mshtml.IHTMLElement;
}
}
}
}
}
使用系统;
使用System.Collections.Generic;
运用系统反思;
使用System.Windows.Forms;
命名空间myTest.WinFormsApp
{
公共部分类主窗体:窗体
{
公共表格(
{
初始化组件();
}
私有void主窗体加载(对象发送方、事件参数e)
{
webBrowser1.DocumentText=@“
产品详情
- 平装本:648页
- 出版商:Wiley;无限版(2001年10月15日)
- 语言:英语
- ISBN-10:0764547763
";
}
私有void cmdTest_单击(对象发送方,事件参数e)
{
var处理器=新的WebBrowserControlXPathQueriesProcessor(webBrowser1);
//更改列表第一个元素的属性
{
var li=processor.GetHtmlElement(“//li”);
li.innerHTML=string.Format(“{0}”,li.innerText);
}
//更改列表第二个和后续元素的属性
var list=processor.GetHtmlElements(“//ul//li”);
int指数=1;
foreach(列表中的变量li)
{
如果(索引+++==1)继续;
li.innerHTML=string.Format(“{0}”,li.innerText);
}
}
///
///启用IE WebBrowser控件以计算XPath查询
///注入http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js
///并将XPath查询结果作为强类型返回给调用C#代码
///mshtml.IHTMLElement和IEnumerable
///
公共类WebBrowserControlXPathQueriesProcessor
{
private System.Windows.Forms.WebBrowser\u WebBrowser;
公共WebBrowserControlXPathQueriesProcessor(System.Windows.Forms.WebBrowser WebBrowser)
{
_webBrowser=webBrowser;
注入脚本();
}
私有脚本()
{
//感谢:http://stackoverflow.com/questions/7998996/how-to-inject-javascript-in-webbrowser-control
HtmlElement head=_webBrowser.Document.GetElementsByTagName(“head”)[0];
HtmlElement scriptEl=_webBrowser.Document.CreateElement(“脚本”);
mshtml.IHTMLScriptElement=(mshtml.IHTMLScriptElement)scriptEl.doelement;
element.src=”http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js";
头.附肢儿童(scriptEl);
字符串javaScriptText=@”
函数GetElements(XPath){
var xPathRes=document.evaluate(XPath,document,null,XPathResult.ORDERED\u NODE\u ITERATOR\u TYPE,null);
var nextElement=xPathRes.iterateNext();
var元素=新对象();
var elementIndex=1;
while(nextElement){
元素[elementIndex++]=nextElement;
nextElement=xPathRes.iterateNext();
}
elements.length=elementIndex-1;
返回元素;
};
";
scriptEl=_webBrowser.Document.CreateElement(“脚本”);
元素=(mshtml.IHTMLScriptElement)scriptEl.doElement;
element.text=javaScriptText;
头.附肢儿童(scriptEl);
}
///
///使用XPath查询获取Html元素的mshtml.IHTMLElement对象实例
///
public mshtml.IHTMLElement GetHtmlElement(字符串xPathQuery)
{
string code=string.Format(“document.evaluate({0}),document,null,XPathResult.FIRST\u ORDERED\u NODE\u TYPE,null)。singleNodeValue;”,xPathQuery);
将_webBrowser.Document.InvokeScript(“eval”,新对象[]{code})作为mshtml.IHTMLElement返回;
}
///
///使用XPath查询获取Html元素的IEnumerable对象实例
///
公共IEnumerable GetHtmlElements(字符串xPathQuery)
{
//感谢:http://stackoverflow.com/questions/5278275/accessing-properties-of-javascript-objects-using-type-dynamic-in-c-sharp-4
var comObject=_webBrowser.Document.InvokeScript(“eval”,新对象[]{string.Format(“GetElements({0}')”,xPathQuery)});
Type Type=comObject.GetType();
int length=(int)type.InvokeMember(“长度”,BindingFlags.GetProperty,null,comObject,null);
对于(inti=1;我能帮上忙吗?@Noseratio:我想避免使用HTML敏捷包-我想直接使用
using System;
using System.Collections.Generic;
using System.Reflection;
using System.Windows.Forms;
namespace myTest.WinFormsApp
{
public partial class MainForm : Form
{
public MainForm()
{
InitializeComponent();
}
private void MainForm_Load(object sender, EventArgs e)
{
webBrowser1.DocumentText = @"
<html>
<body>
<img alt=""0764547763 Product Details""
src=""http://ecx.images-amazon.com/images/I/51AK1MRIi7L._AA160_.jpg"">
<hr/>
<h2>Product Details</h2>
<ul>
<li><b>Paperback:</b> 648 pages</li>
<li><b>Publisher:</b> Wiley; Unlimited Edition edition (October 15, 2001)</li>
<li><b>Language:</b> English</li>
<li><b>ISBN-10:</b> 0764547763</li>
</html>
";
}
private void cmdTest_Click(object sender, EventArgs e)
{
var processor = new WebBrowserControlXPathQueriesProcessor(webBrowser1);
// change attributes of the first element of the list
{
var li = processor.GetHtmlElement("//li");
li.innerHTML = string.Format("<span style='text-transform: uppercase;font-family:verdana;color:green;'>{0}</span>", li.innerText);
}
// change attributes of the second and subsequent elements of the list
var list = processor.GetHtmlElements("//ul//li");
int index = 1;
foreach (var li in list)
{
if (index++ == 1) continue;
li.innerHTML = string.Format("<span style='text-transform: uppercase;font-family:verdana;color:blue;'>{0}</span>", li.innerText);
}
}
/// <summary>
/// Enables IE WebBrowser control to evaluate XPath queries
/// by injecting http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js
/// and to return XPath queries results to the calling C# code as strongly typed
/// mshtml.IHTMLElement and IEnumerable<mshtml.IHTMLElement>
/// </summary>
public class WebBrowserControlXPathQueriesProcessor
{
private System.Windows.Forms.WebBrowser _webBrowser;
public WebBrowserControlXPathQueriesProcessor(System.Windows.Forms.WebBrowser webBrowser)
{
_webBrowser = webBrowser;
injectScripts();
}
private void injectScripts()
{
// Thanks to: http://stackoverflow.com/questions/7998996/how-to-inject-javascript-in-webbrowser-control
HtmlElement head = _webBrowser.Document.GetElementsByTagName("head")[0];
HtmlElement scriptEl = _webBrowser.Document.CreateElement("script");
mshtml.IHTMLScriptElement element = (mshtml.IHTMLScriptElement)scriptEl.DomElement;
element.src = "http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js";
head.AppendChild(scriptEl);
string javaScriptText = @"
function GetElements (XPath) {
var xPathRes = document.evaluate ( XPath, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
var nextElement = xPathRes.iterateNext ();
var elements = new Object();
var elementIndex = 1;
while (nextElement) {
elements[elementIndex++] = nextElement;
nextElement = xPathRes.iterateNext ();
}
elements.length = elementIndex -1;
return elements;
};
";
scriptEl = _webBrowser.Document.CreateElement("script");
element = (mshtml.IHTMLScriptElement)scriptEl.DomElement;
element.text = javaScriptText;
head.AppendChild(scriptEl);
}
/// <summary>
/// Gets Html element's mshtml.IHTMLElement object instance using XPath query
/// </summary>
public mshtml.IHTMLElement GetHtmlElement(string xPathQuery)
{
string code = string.Format("document.evaluate('{0}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;", xPathQuery);
return _webBrowser.Document.InvokeScript("eval", new object[] { code }) as mshtml.IHTMLElement;
}
/// <summary>
/// Gets Html elements' IEnumerable<mshtml.IHTMLElement> object instance using XPath query
/// </summary>
public IEnumerable<mshtml.IHTMLElement> GetHtmlElements(string xPathQuery)
{
// Thanks to: http://stackoverflow.com/questions/5278275/accessing-properties-of-javascript-objects-using-type-dynamic-in-c-sharp-4
var comObject = _webBrowser.Document.InvokeScript("eval", new object[] { string.Format("GetElements('{0}')", xPathQuery) });
Type type = comObject.GetType();
int length = (int)type.InvokeMember("length", BindingFlags.GetProperty, null, comObject, null);
for (int i = 1; i <= length; i++)
{
yield return type.InvokeMember(i.ToString(), BindingFlags.GetProperty, null, comObject, null) as mshtml.IHTMLElement;
}
}
}
}
}