Java 如何获得这个隐藏字段?
我有一段HTML:Java 如何获得这个隐藏字段?,java,web-crawler,jsoup,Java,Web Crawler,Jsoup,我有一段HTML: <li class="hidden-xs-inline"> <a class='page-link' href="/acne-scars-treatment/answers?page=226" data-page="226"> </a> </li> 但它没有返回我想要的号码 有什么想法吗 所需的数据不在元素体中,因此“text()”不起作用。尝试: elements.attr("data-page"); 有关更多信息,请参
<li class="hidden-xs-inline">
<a class='page-link' href="/acne-scars-treatment/answers?page=226" data-page="226">
</a>
</li>
但它没有返回我想要的号码
有什么想法吗 所需的数据不在元素体中,因此“text()”不起作用。尝试:
elements.attr("data-page");
有关更多信息,请参阅:
见下文:
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.List;
/**
add following dependencies to pom.xml
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
*/
public class JsoupExample {
public static void main(String[] args) throws URISyntaxException {
String HTMLSTring = "<li class=\"hidden-xs-inline\">\n" +
" <a class='page-link' href=\"/acne-scars-treatment/answers?page=226\" data-page=\"226\">\n" +
"</a>\n" +
"</li>";
Document html = Jsoup.parse(HTMLSTring);
Elements hiddenElements = html.body().getElementsByClass("hidden-xs-inline");
Elements pageLinkElements = hiddenElements.get(0).getElementsByClass("page-link");
String href = pageLinkElements.get(0).attr("href");
List<NameValuePair> params = URLEncodedUtils.parse(new URI(href), Charset.forName("UTF-8"));
for (NameValuePair param : params) {
System.out.println(param.getName() + " : " + param.getValue()); //page : 226
}
}
}
import org.apache.http.NameValuePair;
导入org.apache.http.client.utils.URLEncodedUtils;
导入org.jsoup.jsoup;
导入org.jsoup.nodes.Document;
导入org.jsoup.select.Elements;
导入java.net.URI;
导入java.net.URISyntaxException;
导入java.nio.charset.charset;
导入java.util.List;
/**
将以下依赖项添加到pom.xml
org.jsoup
\n“+
“”;
文档html=Jsoup.parse(HTMLSTring);
Elements hiddenElements=html.body().getElementsByClass(“隐藏的xs内联”);
Elements pageLinkElements=hiddenElements.get(0.getElementsByClass(“页面链接”);
String href=pageLinkElements.get(0.attr(“href”);
List params=URLEncodedUtils.parse(新URI(href)、Charset.forName(“UTF-8”);
for(NameValuePair参数:params){
System.out.println(param.getName()+”:“+param.getValue());//第226页
}
}
}
它不起作用,在线程“main”java.lang.IndexOutOfBoundsException中给了我以下错误异常:索引:0,大小:0。另外,如何在Netbeans中添加pom.xml?在Netbeans中,您必须将jar文件添加到类路径。可以在此处找到jar文件
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.List;
/**
add following dependencies to pom.xml
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
*/
public class JsoupExample {
public static void main(String[] args) throws URISyntaxException {
String HTMLSTring = "<li class=\"hidden-xs-inline\">\n" +
" <a class='page-link' href=\"/acne-scars-treatment/answers?page=226\" data-page=\"226\">\n" +
"</a>\n" +
"</li>";
Document html = Jsoup.parse(HTMLSTring);
Elements hiddenElements = html.body().getElementsByClass("hidden-xs-inline");
Elements pageLinkElements = hiddenElements.get(0).getElementsByClass("page-link");
String href = pageLinkElements.get(0).attr("href");
List<NameValuePair> params = URLEncodedUtils.parse(new URI(href), Charset.forName("UTF-8"));
for (NameValuePair param : params) {
System.out.println(param.getName() + " : " + param.getValue()); //page : 226
}
}
}