Java 如何获得这个隐藏字段?

Java 如何获得这个隐藏字段?,java,web-crawler,jsoup,Java,Web Crawler,Jsoup,我有一段HTML: <li class="hidden-xs-inline"> <a class='page-link' href="/acne-scars-treatment/answers?page=226" data-page="226"> </a> </li> 但它没有返回我想要的号码 有什么想法吗 所需的数据不在元素体中,因此“text()”不起作用。尝试: elements.attr("data-page"); 有关更多信息,请参

我有一段HTML:

<li class="hidden-xs-inline">
 <a class='page-link' href="/acne-scars-treatment/answers?page=226" data-page="226">
</a>
</li>
但它没有返回我想要的号码


有什么想法吗

所需的数据不在元素体中,因此“text()”不起作用。尝试:

elements.attr("data-page");
有关更多信息,请参阅: 见下文:

import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.List;


/**
 add following dependencies to pom.xml

           <dependency>
              <groupId>org.jsoup</groupId>
               <artifactId>jsoup</artifactId>
               <version>1.10.2</version>
           </dependency>

           <dependency>
               <groupId>org.apache.httpcomponents</groupId>
               <artifactId>httpclient</artifactId>
               <version>4.5.3</version>
           </dependency>

 */
public class JsoupExample {

    public static void main(String[] args) throws URISyntaxException {

        String HTMLSTring = "<li class=\"hidden-xs-inline\">\n" +
                " <a class='page-link' href=\"/acne-scars-treatment/answers?page=226\" data-page=\"226\">\n" +
                "</a>\n" +
                "</li>";

        Document html = Jsoup.parse(HTMLSTring);

        Elements hiddenElements = html.body().getElementsByClass("hidden-xs-inline");

        Elements pageLinkElements = hiddenElements.get(0).getElementsByClass("page-link");
        String href = pageLinkElements.get(0).attr("href");

        List<NameValuePair> params = URLEncodedUtils.parse(new URI(href), Charset.forName("UTF-8"));

        for (NameValuePair param : params) {
            System.out.println(param.getName() + " : " + param.getValue());  //page : 226
        }


    }
}
import org.apache.http.NameValuePair;
导入org.apache.http.client.utils.URLEncodedUtils;
导入org.jsoup.jsoup;
导入org.jsoup.nodes.Document;
导入org.jsoup.select.Elements;
导入java.net.URI;
导入java.net.URISyntaxException;
导入java.nio.charset.charset;
导入java.util.List;
/**
将以下依赖项添加到pom.xml
org.jsoup
\n“+
“”;
文档html=Jsoup.parse(HTMLSTring);
Elements hiddenElements=html.body().getElementsByClass(“隐藏的xs内联”);
Elements pageLinkElements=hiddenElements.get(0.getElementsByClass(“页面链接”);
String href=pageLinkElements.get(0.attr(“href”);
List params=URLEncodedUtils.parse(新URI(href)、Charset.forName(“UTF-8”);
for(NameValuePair参数:params){
System.out.println(param.getName()+”:“+param.getValue());//第226页
}
}
}

它不起作用,在线程“main”java.lang.IndexOutOfBoundsException中给了我以下错误异常:索引:0,大小:0。另外,如何在Netbeans中添加pom.xml?在Netbeans中,您必须将jar文件添加到类路径。可以在此处找到jar文件
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.List;


/**
 add following dependencies to pom.xml

           <dependency>
              <groupId>org.jsoup</groupId>
               <artifactId>jsoup</artifactId>
               <version>1.10.2</version>
           </dependency>

           <dependency>
               <groupId>org.apache.httpcomponents</groupId>
               <artifactId>httpclient</artifactId>
               <version>4.5.3</version>
           </dependency>

 */
public class JsoupExample {

    public static void main(String[] args) throws URISyntaxException {

        String HTMLSTring = "<li class=\"hidden-xs-inline\">\n" +
                " <a class='page-link' href=\"/acne-scars-treatment/answers?page=226\" data-page=\"226\">\n" +
                "</a>\n" +
                "</li>";

        Document html = Jsoup.parse(HTMLSTring);

        Elements hiddenElements = html.body().getElementsByClass("hidden-xs-inline");

        Elements pageLinkElements = hiddenElements.get(0).getElementsByClass("page-link");
        String href = pageLinkElements.get(0).attr("href");

        List<NameValuePair> params = URLEncodedUtils.parse(new URI(href), Charset.forName("UTF-8"));

        for (NameValuePair param : params) {
            System.out.println(param.getName() + " : " + param.getValue());  //page : 226
        }


    }
}