如何在JSoupJava中仅显示html标记？_Java_Html_Eclipse_Parsing_Jsoup

如何在JSoupJava中仅显示html标记？

java html eclipse parsing

如何在JSoupJava中仅显示html标记？,java,html,eclipse,parsing,jsoup,Java,Html,Eclipse,Parsing,Jsoup,我正在做一个学校的项目，我正试图解析一个html网页来显示标记，就像下面的输出一样，没有结束标记。（我手工编码） } 这是我到目前为止的几行输出 package Scrapper; import java.util.LinkedList; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Node; import org.jsoup.select.NodeVisitor; clas

我正在做一个学校的项目，我正试图解析一个html网页来显示标记，就像下面的输出一样，没有结束标记。（我手工编码）

}

这是我到目前为止的几行输出

package Scrapper;

import java.util.LinkedList;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeVisitor;

class TagVisitor implements NodeVisitor {

    public static class TagInfo {

        public String name;
        public int depth;

        TagInfo(String name, int depth) {
            this.depth = depth;
            this.name = name;
        }
    }

    private LinkedList<TagInfo> tags = new LinkedList<>();

    public void head(Node node, int depth) {
        String tag = node.nodeName();
        if(!tag.startsWith("#")) {
            tags.add(new TagInfo('<'+node.nodeName()+'>', depth)); 
        }
    }

    public void tail(Node node, int depth) {
        //Do nothing
    }

    public LinkedList<TagInfo> getTags() {
        return tags;
    }

    public void printTree() {
        for(TagInfo info : tags) {
            String indentation = new String(new char[info.depth*2]).replace('\0', ' ');
            System.out.println(indentation + info.name);
        }
    }
}

public class MainJsoup {

    public static void main(String[] args) throws Exception {

        //InputStream stream = new FileInputStream("test.html");
        //Document doc = Jsoup.parse(stream, "UTF-8", "");
        String URL ="http://csb.stanford.edu/class/public/pages/sykes_webdesign/05_simple.html";
        Document doc = Jsoup
                        .connect(URL)
                        .userAgent("Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201")
                        .timeout(2000)
                        .get();     
        TagVisitor visitor = new TagVisitor();
        doc.traverse(visitor);
        visitor.printTree();
    }
}

包装刮板机；
导入java.util.LinkedList；
导入org.jsoup.jsoup；
导入org.jsoup.nodes.Document；
导入org.jsoup.nodes.Node；
导入org.jsoup.select.NodeVisitor；
类TagVisitor实现NodeVisitor{
公共静态类TagInfo{
公共字符串名称；
公众参与深度；
TagInfo（字符串名称，整数深度）{
这个。深度=深度；
this.name=名称；
}
}
私有LinkedList标记=新LinkedList（）；
公共空心封头（节点，内部深度）{
String tag=node.nodeName（）；
如果（！tag.startsWith（“#”）{
添加（新标记信息（“”，深度））；
}
}
公共空尾（节点，整数深度）{
//无所事事
}
公共链接列表getTags（）{
返回标签；
}
公共void printree（）{
用于（标记信息：标记）{
字符串缩进=新字符串（新字符[info.depth*2]）。替换（'\0'，''；
System.out.println（缩进+信息名）；
}
}
}
公共类MainJsoup{
公共静态void main（字符串[]args）引发异常{
//InputStream=新文件InputStream（“test.html”）；
//documentdoc=Jsoup.parse（流，“UTF-8”和“”）；
字符串URL=”http://csb.stanford.edu/class/public/pages/sykes_webdesign/05_simple.html";
文档doc=Jsoup
.connect（URL）
.userAgent（“Mozilla/5.0（Windows；U；Windows NT 6.1；rv:2.2）Gecko/20110201”）
.超时（2000）
.get（）；
TagVisitor=新的TagVisitor（）；
特拉弗斯博士（访客）；
visitor.printree（）；
}
}

问题是什么？如何在JSoup java中只显示html标记？没问题。玩得高兴

public class ReadWithScanner {
public static void main(String[] args) throws IOException 
{
    String URL ="http://csb.stanford.edu/class/public/pages/sykes_webdesign/05_simple.html";
    Document doc = Jsoup.connect(URL).get();        
    //Element p = doc.select("p");
    //Elements p = doc.getElementsByTag("h6");
    Elements p = doc.select("html");
    //System.out.println(p);

     DoublyLinkedList theList = new DoublyLinkedList();

      theList.insert(p);      // insert at front

      theList.displayTree();
}

package Scrapper;

import java.util.LinkedList;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeVisitor;

class TagVisitor implements NodeVisitor {

    public static class TagInfo {

        public String name;
        public int depth;

        TagInfo(String name, int depth) {
            this.depth = depth;
            this.name = name;
        }
    }

    private LinkedList<TagInfo> tags = new LinkedList<>();

    public void head(Node node, int depth) {
        String tag = node.nodeName();
        if(!tag.startsWith("#")) {
            tags.add(new TagInfo('<'+node.nodeName()+'>', depth)); 
        }
    }

    public void tail(Node node, int depth) {
        //Do nothing
    }

    public LinkedList<TagInfo> getTags() {
        return tags;
    }

    public void printTree() {
        for(TagInfo info : tags) {
            String indentation = new String(new char[info.depth*2]).replace('\0', ' ');
            System.out.println(indentation + info.name);
        }
    }
}

public class MainJsoup {

    public static void main(String[] args) throws Exception {

        //InputStream stream = new FileInputStream("test.html");
        //Document doc = Jsoup.parse(stream, "UTF-8", "");
        String URL ="http://csb.stanford.edu/class/public/pages/sykes_webdesign/05_simple.html";
        Document doc = Jsoup
                        .connect(URL)
                        .userAgent("Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201")
                        .timeout(2000)
                        .get();     
        TagVisitor visitor = new TagVisitor();
        doc.traverse(visitor);
        visitor.printTree();
    }
}