Java Jsoup:提取2个随机标记之间的所有内容
我得到了HTML文件:Java Jsoup:提取2个随机标记之间的所有内容,java,html,jsoup,Java,Html,Jsoup,我得到了HTML文件: <div>test</div> abc <content > <!--alo 123--> <div>content alo 123 here</div> </content> yes <div>test</div> 我尝试在上一个标记后添加: previousTag.append("<gf>"); nextTag.before("</
<div>test</div>
abc
<content >
<!--alo 123-->
<div>content alo 123 here</div>
</content>
yes
<div>test</div>
我尝试在上一个标记后添加:
previousTag.append("<gf>");
nextTag.before("</gf>");
int iPrevious=previousTag.elementSiblingIndex();
Elements selection=previousTag.getElementsByIndexGreaterThan(iPrevious);
为了在“prevoiusTag”和“nextTag”之后获得一切,它最终没有起作用。
我看了另一篇有同样问题的文章,但无法应用。
他们知道自己的标签是什么。
我避免使用循环来处理列表\u同级\u节点。
有人知道如何提取两个随机标记之间的所有内容吗?它们是解决问题的两个选项 选项1:CSS查询方法 Jsoup会帮你处理所有的脏活。只有当您对同级
TextNodes
不感兴趣时,这种方法才有效
div:containsOwn(test):first-of-type ~ *:not(div:containsOwn(test), div:containsOwn(test):last-of-type ~ *)
请记住:由于*
运算符只匹配元素,因此不会匹配文本节点
说明:
备选方案2:API方法
下面的代码手动检查两个选定元素(两个随机标记)之间的每个同级节点。当找到第二个随机元素时,它停止
String firstRandomElementSelector = "div:containsOwn(test):first-of-type";
String secondRandomElementSelector = "div:containsOwn(test):last-of-type";
Document doc = ...;
Element firstRandomElement = select(doc, firstRandomElementSelector);
Element secondRandomElement = select(doc, secondRandomElementSelector);
List<Node> siblingNodes = firstRandomElement.siblingNodes();
List<Node> nodesInBetween = new ArrayList<>();
Node currentNode = firstRandomElement;
do {
Node nextSibling = currentNode.nextSibling();
if (nextSibling == null) {
break;
}
if (secondRandomElement.equals(nextSibling)) {
break;
}
nodesInBetween.add(nextSibling);
currentNode = nextSibling;
} while(true);
for(Node node : nodesInBetween) {
System.out.println(node.outerHtml() + "---");
}
// Helper method
private static Element select(Document doc, String cssSelector) {
Element element = doc.select(cssSelector).first();
if (element == null) {
throw new RuntimeException("Unable to locate any element matching:" + cssSelector);
}
}
String firstRandomElementSelector=“div:containsOwn(测试):类型的第一个”;
String secondRandomElementSelector=“div:containsOwn(测试):类型的最后一个”;
文件文件=。。。;
元素firstRandomElement=选择(文档,firstRandomElementSelector);
Element secondRandomElement=选择(单据,secondRandomElement选择器);
List siblingNodes=firstRandomElement.siblingNodes();
List nodesInBetween=new ArrayList();
节点currentNode=firstRandomElement;
做{
Node nextSibling=currentNode.nextSibling();
if(nextSibling==null){
打破
}
if(secondRandomElement.equals(nextSibling)){
打破
}
nodesinbeween.add(nextSibling);
currentNode=nextSibling;
}虽然(正确);
用于(节点:节点间){
System.out.println(node.outerHtml()+“--”);
}
//辅助方法
私有静态元素选择(文档文档、字符串CSS选择器){
Element=doc.select(cssSelector.first();
if(元素==null){
抛出新的RuntimeException(“无法找到任何匹配的元素:“+cssSelector”);
}
}
div:containsOwn(test):first-of-type ~ *:not(div:containsOwn(test), div:containsOwn(test):last-of-type ~ *)
div:containsOwn(test) /* Select a div containing directly the text `test` */
:first-of-type /* Keep only the first div found (1) */
~ * /* Select all siblings of (1) ... */
:not( /* ... excluding ... */
div:containsOwn(test) /* ... any div containing directly the text `test` */
, /* OR */
div:containsOwn(test):last-of-type ~ *) /* any sibling after the second div (second random tag) */
) /* :not operator closing parenthesis */
String firstRandomElementSelector = "div:containsOwn(test):first-of-type";
String secondRandomElementSelector = "div:containsOwn(test):last-of-type";
Document doc = ...;
Element firstRandomElement = select(doc, firstRandomElementSelector);
Element secondRandomElement = select(doc, secondRandomElementSelector);
List<Node> siblingNodes = firstRandomElement.siblingNodes();
List<Node> nodesInBetween = new ArrayList<>();
Node currentNode = firstRandomElement;
do {
Node nextSibling = currentNode.nextSibling();
if (nextSibling == null) {
break;
}
if (secondRandomElement.equals(nextSibling)) {
break;
}
nodesInBetween.add(nextSibling);
currentNode = nextSibling;
} while(true);
for(Node node : nodesInBetween) {
System.out.println(node.outerHtml() + "---");
}
// Helper method
private static Element select(Document doc, String cssSelector) {
Element element = doc.select(cssSelector).first();
if (element == null) {
throw new RuntimeException("Unable to locate any element matching:" + cssSelector);
}
}