Javascript HtmlUnit大型异常日志试图获取加载了脚本的某些字段的网页的html

Javascript HtmlUnit大型异常日志试图获取加载了脚本的某些字段的网页的html,javascript,java,web-scraping,htmlunit,Javascript,Java,Web Scraping,Htmlunit,我正在尝试阅读网页的html。我认为有些字段是由脚本填充的,因为我可以在浏览器中清楚地看到它们,但在浏览网页时,它们的内容是空的。 当我尝试在下一个代码中使用HtmlUnit时,我在日志中遇到了一个很大的异常,我不知道如何修复它 这是我的密码: import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftwar

我正在尝试阅读网页的html。我认为有些字段是由脚本填充的,因为我可以在浏览器中清楚地看到它们,但在浏览网页时,它们的内容是空的。 当我尝试在下一个代码中使用HtmlUnit时,我在日志中遇到了一个很大的异常,我不知道如何修复它

这是我的密码:

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomNode;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class HtmlUnitTest {

    public static void main(String[] args) {

         /* turn off annoying htmlunit warnings */
        java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(java.util.logging.Level.OFF);

        String searchQuery = "William Hill" ;

        WebClient client = new WebClient(BrowserVersion.CHROME);  
        client.getOptions().setCssEnabled(false);  
        client.getOptions().setJavaScriptEnabled(true);  
        try {  
          String searchUrl = "http://sports.williamhill.es/bet_esp/es/betting/t/338/LaLiga.html" + URLEncoder.encode(searchQuery, "UTF-8");

          HtmlPage page = client.getPage(searchUrl);
          client.waitForBackgroundJavaScriptStartingBefore(10000);

          final DomNodeList<DomNode> divs = page.querySelectorAll("tr.rowOdd");

          for (DomNode div : divs) {
                System.out.println(div.asXml());
            }


        }catch(Exception e){
          e.printStackTrace();
        }
        }


}
导入com.gargoylesoftware.htmlunit.BrowserVersion;
导入com.gargoylesoftware.htmlunit.WebClient;
导入com.gargoylesoftware.htmlunit.html.DomNode;
导入com.gargoylesoftware.htmlunit.html.DomNodeList;
导入com.gargoylesoftware.htmlunit.html.HtmlPage;
公共类HtmlUnitTest{
公共静态void main(字符串[]args){
/*关闭恼人的htmlunit警告*/
getLogger(“com.gargoylesoftware”).setLevel(java.util.logging.Level.OFF);
字符串searchQuery=“William Hill”;
WebClient客户端=新的WebClient(BrowserVersion.CHROME);
client.getOptions().setCssEnabled(false);
client.getOptions().setJavaScriptEnabled(true);
试试{
字符串搜索URL=”http://sports.williamhill.es/bet_esp/es/betting/t/338/LaLiga.html“+URLEncoder.encode(搜索查询,“UTF-8”);
HtmlPage=client.getPage(searchUrl);
client.waitForBackgroundJavaScriptStartingBefore(10000);
final DomNodeList divs=page.queryselectoral(“tr.rowOdd”);
for(DomNode div:divs){
System.out.println(div.asXml());
}
}捕获(例外e){
e、 printStackTrace();
}
}
}
以下是我得到的一个例外:

======= EXCEPTION START ========
EcmaError: lineNumber=[471] column=[0] lineSource=[<no source>] name=[TypeError] sourceName=[http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1] message=[TypeError: Cannot call method "replace" of undefined (http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1#471)]
com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot call method "replace" of undefined (http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1#471)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:914)
    at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:599)
    at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:527)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:794)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:770)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:761)
    at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:919)
    at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:316)
    at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:396)
    at com.gargoylesoftware.htmlunit.html.HtmlScript$2.execute(HtmlScript.java:246)
    at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:267)
    at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:805)
    at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source)
    at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:761)
    at net.sourceforge.htmlunit.cyberneko.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1236)
    at net.sourceforge.htmlunit.cyberneko.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1136)
    at net.sourceforge.htmlunit.cyberneko.filters.DefaultFilter.endElement(DefaultFilter.java:226)
    at net.sourceforge.htmlunit.cyberneko.filters.NamespaceBinder.endElement(NamespaceBinder.java:345)
    at net.sourceforge.htmlunit.cyberneko.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3189)
    at net.sourceforge.htmlunit.cyberneko.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2141)
    at net.sourceforge.htmlunit.cyberneko.HTMLScanner.scanDocument(HTMLScanner.java:945)
    at net.sourceforge.htmlunit.cyberneko.HTMLConfiguration.parse(HTMLConfiguration.java:521)
    at net.sourceforge.htmlunit.cyberneko.HTMLConfiguration.parse(HTMLConfiguration.java:472)
    at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
    at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1004)
    at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:253)
    at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:195)
    at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:267)
    at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:158)
    at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:529)
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:398)
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:315)
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:463)
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:448)
    at HtmlUnitTest.main(HtmlUnitTest.java:25)
Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot call method "replace" of undefined (http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1#471)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:4130)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:4108)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:4141)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:4160)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefCallError(ScriptRuntime.java:4179)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getPropFunctionAndThisHelper(ScriptRuntime.java:2509)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getPropFunctionAndThis(ScriptRuntime.java:2502)
    at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1327)
    at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:815)
    at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:111)
    at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:417)
    at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:325)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3424)
    at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:122)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:785)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:899)
    ... 34 more
Enclosed exception: 
net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot call method "replace" of undefined (http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1#471)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:4130)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:4108)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:4141)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:4160)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefCallError(ScriptRuntime.java:4179)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getPropFunctionAndThisHelper(ScriptRuntime.java:2509)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getPropFunctionAndThis(ScriptRuntime.java:2502)
    at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1327)
    at script(http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1:471)
    at script(http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1:377)
    at script(http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1:379)
    at script(http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1:397)
    at script(http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1:415)
    at script(http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1:418)
    at script(script in http://sports.williamhill.es/bet_esp/es/betting/t/338/LaLiga.htmlWilliam+Hill from (1174, 34) to (1192, 12):1176)
    at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:815)
    at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:111)
    at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:417)
    at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:325)
    at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3424)
    at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:122)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:785)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:899)
    at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:599)
    at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:527)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:794)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:770)
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:761)
    at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:919)
    at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:316)
    at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:396)
    at com.gargoylesoftware.htmlunit.html.HtmlScript$2.execute(HtmlScript.java:246)
    at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:267)
    at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:805)
    at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source)
    at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:761)
    at net.sourceforge.htmlunit.cyberneko.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1236)
    at net.sourceforge.htmlunit.cyberneko.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1136)
    at net.sourceforge.htmlunit.cyberneko.filters.DefaultFilter.endElement(DefaultFilter.java:226)
    at net.sourceforge.htmlunit.cyberneko.filters.NamespaceBinder.endElement(NamespaceBinder.java:345)
    at net.sourceforge.htmlunit.cyberneko.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3189)
    at net.sourceforge.htmlunit.cyberneko.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2141)
    at net.sourceforge.htmlunit.cyberneko.HTMLScanner.scanDocument(HTMLScanner.java:945)
    at net.sourceforge.htmlunit.cyberneko.HTMLConfiguration.parse(HTMLConfiguration.java:521)
    at net.sourceforge.htmlunit.cyberneko.HTMLConfiguration.parse(HTMLConfiguration.java:472)
    at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
    at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1004)
    at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:253)
    at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:195)
    at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:267)
    at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:158)
    at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:529)
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:398)
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:315)
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:463)
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:448)
    at HtmlUnitTest.main(HtmlUnitTest.java:25)
======= EXCEPTION END ========
=======异常启动========
EcmaError:lineNumber=[471]column=[0]lineSource=[]name=[TypeError]sourceName=[http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1]message=[TypeError:无法调用未定义的方法“replace”(http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1#471)]
com.gargoylesoftware.htmlunit.ScriptException:类型错误:无法调用未定义的方法“replace”(http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1#471)
在com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:914)
位于net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:599)
位于net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:527)
位于com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:794)
位于com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:770)
位于com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:761)
位于com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:919)
在com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:316)上
在com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:396)上
位于com.gargoylesoftware.htmlunit.html.HtmlScript$2.execute(HtmlScript.java:246)
在com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage上(HtmlScript.java:267)
位于com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:805)
位于org.apache.xerces.parsers.AbstractSAXParser.endElement(未知源)
位于com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:761)
位于net.sourceforge.htmlunit.cyberneko.HTMLTagBalancer.callendement(HTMLTagBalancer.java:1236)
位于net.sourceforge.htmlunit.cyberneko.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1136)
位于net.sourceforge.htmlunit.cyberneko.filters.DefaultFilter.endElement(DefaultFilter.java:226)
位于net.sourceforge.htmlunit.cyberneko.filters.NamespaceBinder.endElement(NamespaceBinder.java:345)
位于net.sourceforge.htmlunit.cyberneko.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3189)
位于net.sourceforge.htmlunit.cyberneko.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2141)
位于net.sourceforge.htmlunit.cyberneko.HTMLScanner.scanDocument(HTMLScanner.java:945)
位于net.sourceforge.htmlunit.cyberneko.HTMLConfiguration.parse(HTMLConfiguration.java:521)
位于net.sourceforge.htmlunit.cyberneko.HTMLConfiguration.parse(HTMLConfiguration.java:472)
位于org.apache.xerces.parsers.XMLParser.parse(未知源)
位于com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1004)
位于com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:253)
位于com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:195)
在com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:267)
在com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:158)
在com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:529)上
在com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:398)上
在com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:315)上
在com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:463)上
在com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:448)上
位于HtmlUnitTest.main(HtmlUnitTest.java:25)
原因:net.sourceforge.htmlunit.corejs.javascript.EcmaError:TypeError:无法调用未定义的方法“replace”(http://trans.staticcache.org/ob/static/cust/js/minified/main_end.js?ver=f4b42247e3c3fbf04e68fa1715088db1#471)
位于net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:4130)
位于net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:4108)
在net.sourceforge.h