使用XPath和WebBrowser Control选择多个节点

在C#WinForms示例应用程序中,我使用了WebBrowser控件和JavaScript-XPath来选择单个节点,并通过以下代码更改该节点.innerHtml:

private void MainForm_Load(object sender, EventArgs e) { webBrowser1.DocumentText = @"       

Product Details

  • Paperback: 648 pages
  • Publisher: Wiley; Unlimited Edition edition (October 15, 2001)
  • Language: English
  • ISBN-10: 0764547763
"; } private void cmdTest_Click(object sender, EventArgs e) { string xPath = "//li"; string code = string.Format("document.evaluate('{0}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;", xPath); var li = webBrowser1.Document.InvokeScript("eval", new object[] { code }) as mshtml.IHTMLElement; li.innerHTML = string.Format("{0}", li.innerText); }

运行此代码的结果如下:

示例代码执行结果

现在我想使用相同的技术在

    节点下选择多个

    • 节点,我正在编写:

        xPath = "//ul//*"; code = string.Format("document.evaluate('{0}', document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);", xPath); var allLI = webBrowser1.Document.InvokeScript("eval", new object[] { code }) as mshtml.IHTMLElementCollection; 

      但是allLI变量的返回值为NULL

      如果我会写

        xPath = "//ul//*"; code = string.Format("document.evaluate('{0}', document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);", xPath); var allLI = webBrowser1.Document.InvokeScript("eval", new object[] { code }); 

      那么返回的allLI变量不是null,它的值类型是COM Object但是这个COM Object可以转换为更具体的类型对我来说还不清楚。

      有没有办法在这里使用技术选择多个节点?

      [EDITED]

      xPath =“ul // *”;

      xPath =“// ul // *”;

      [加成]

      我在我的示例HTML中添加了两个javaScript函数:

        function GetElementsText (XPath) { var xPathRes = document.evaluate ( XPath, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null); var nextElement = xPathRes.iterateNext (); var text = """"; while (nextElement) { text += nextElement.innerText; nextElement = xPathRes.iterateNext (); } return text; }; function GetElements (XPath) { var xPathRes = document.evaluate ( XPath, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null); var nextElement = xPathRes.iterateNext (); var elements = new Object(); var elementIndex = 1; while (nextElement) { elements[elementIndex++] = nextElement; nextElement = xPathRes.iterateNext (); } return elements; };  

      现在,当我在cmd_TestClick方法中运行以下C#代码行时:

       var text = webBrowser1.Document.InvokeScript("eval", new object[] { "GetElementsText('//ul')" }); 

      我正在获取所有li元素的文本:

       "Paperback: 648 pages \r\nPublisher: Wiley; Unlimited Edition edition (October 15, 2001) \r\nLanguage: English \r\nISBN-10: 0764547763 " 

      当我在cmd_TestClick方法中运行以下C#代码行时:

       var elements = webBrowser1.Document.InvokeScript("eval", new object[] { "GetElements('//ul')" }); 

      我正在获取COM Object ,我无法将其转换为IEnumerable

      有没有办法在C#代码中处理由返回的HTML节点的javaScript集合

       var elements = webBrowser1.Document.InvokeScript("eval", new object[] { "GetElements('//ul')" }); 

      我找到了解决方案,这里是代码:

       using System; using System.Collections.Generic; using System.Reflection; using System.Windows.Forms; namespace myTest.WinFormsApp { public partial class MainForm : Form { public MainForm() { InitializeComponent(); } private void MainForm_Load(object sender, EventArgs e) { webBrowser1.DocumentText = @"    

      Product Details

      • Paperback: 648 pages
      • Publisher: Wiley; Unlimited Edition edition (October 15, 2001)
      • Language: English
      • ISBN-10: 0764547763
      "; } private void cmdTest_Click(object sender, EventArgs e) { var processor = new WebBrowserControlXPathQueriesProcessor(webBrowser1); // change attributes of the first element of the list { var li = processor.GetHtmlElement("//li"); li.innerHTML = string.Format("{0}", li.innerText); } // change attributes of the second and subsequent elements of the list var list = processor.GetHtmlElements("//ul//li"); int index = 1; foreach (var li in list) { if (index++ == 1) continue; li.innerHTML = string.Format("{0}", li.innerText); } } /// /// Enables IE WebBrowser control to evaluate XPath queries /// by injecting http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js /// and to return XPath queries results to the calling C# code as strongly typed /// mshtml.IHTMLElement and IEnumerable /// public class WebBrowserControlXPathQueriesProcessor { private System.Windows.Forms.WebBrowser _webBrowser; public WebBrowserControlXPathQueriesProcessor(System.Windows.Forms.WebBrowser webBrowser) { _webBrowser = webBrowser; injectScripts(); } private void injectScripts() { // Thanks to: http://stackoverflow.com/questions/7998996/how-to-inject-javascript-in-webbrowser-control HtmlElement head = _webBrowser.Document.GetElementsByTagName("head")[0]; HtmlElement scriptEl = _webBrowser.Document.CreateElement("script"); mshtml.IHTMLScriptElement element = (mshtml.IHTMLScriptElement)scriptEl.DomElement; element.src = "http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js"; head.AppendChild(scriptEl); string javaScriptText = @" function GetElements (XPath) { var xPathRes = document.evaluate ( XPath, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null); var nextElement = xPathRes.iterateNext (); var elements = new Object(); var elementIndex = 1; while (nextElement) { elements[elementIndex++] = nextElement; nextElement = xPathRes.iterateNext (); } elements.length = elementIndex -1; return elements; }; "; scriptEl = _webBrowser.Document.CreateElement("script"); element = (mshtml.IHTMLScriptElement)scriptEl.DomElement; element.text = javaScriptText; head.AppendChild(scriptEl); } /// /// Gets Html element's mshtml.IHTMLElement object instance using XPath query /// public mshtml.IHTMLElement GetHtmlElement(string xPathQuery) { string code = string.Format("document.evaluate('{0}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;", xPathQuery); return _webBrowser.Document.InvokeScript("eval", new object[] { code }) as mshtml.IHTMLElement; } /// /// Gets Html elements' IEnumerable object instance using XPath query /// public IEnumerable GetHtmlElements(string xPathQuery) { // Thanks to: http://stackoverflow.com/questions/5278275/accessing-properties-of-javascript-objects-using-type-dynamic-in-c-sharp-4 var comObject = _webBrowser.Document.InvokeScript("eval", new object[] { string.Format("GetElements('{0}')", xPathQuery) }); Type type = comObject.GetType(); int length = (int)type.InvokeMember("length", BindingFlags.GetProperty, null, comObject, null); for (int i = 1; i <= length; i++) { yield return type.InvokeMember(i.ToString(), BindingFlags.GetProperty, null, comObject, null) as mshtml.IHTMLElement; } } } } }

      以下是代码运行结果:

      代码运行结果

      我已将信用'引用到我的代码内联。 如果你发现我错过了一些,请在评论中指出我,我会添加它们。

      如果你知道更好的解决方案 - 更短的代码,更有效的代码 - 请评论和/或发布你的答案。