Java 在androidstudio中获取下一个同名类jsoup的元素

Java 在androidstudio中获取下一个同名类jsoup的元素,java,android,html,jsoup,Java,Android,Html,Jsoup,我想得到下一个在html中具有相同名称类的元素。html标记类似于: html: <section class="post"> <img class="pecintakomik" src="/images/top/op.jpg" alt="pecintakomik.com" /> <div class="post-cnt"> <h2>Manga bla bla</h2

我想得到下一个在html中具有相同名称类的元素。html标记类似于:

html:

  <section class="post">
        <img class="pecintakomik" src="/images/top/op.jpg" alt="pecintakomik.com" />
            <div class="post-cnt">
                <h2>Manga bla bla</h2>
                    <ul>
                    <li><strong>Nama Alternatif:</strong> </li>
                    <li><strong>Tahun Rilis:</strong> 2010</li>
                    <li><strong>Author(s):</strong> sensei1
                    <li><strong>Artist(s):</strong> sense2</li>
                    <li><strong>Genre:</strong> Action</li>
                    <li><strong>Sinopsis:</strong> bla bla bla </li>
                    <li><span class='st_facebook_hcount' displayText='Facebook'></span> <span class='st_twitter_hcount' displayText='Tweet'></span> <span class='st_sharethis_hcount' displayText='ShareThis'></span></li>                      
                    </ul>
            </div>
                <div class="clear">&nbsp;</div>
    </section>
    <img src="http://www.pecintakomik.com/images/block.png">
    <section class="post">
        <div class="post-cnt">
            <h2>List Chapter(s)</h2>
            <ul>
                <li><a href="/manga/bla_bla/816"> bla bl 816 <img src="/images/new.gif"><em>Baca Online </em></a></li>
                <li><a href="/manga/bla_bla/815"> bla bla 815<em>Baca Online </em></a></li>
                <li><a href="/manga/bla_bla/814"> bla bla 814<em>Baca Online </em></a></li>
                <li><a href="/manga/bla_bla/813"> bla bla 813<em>Baca Online </em></a></li>
            </ul>
       </div>
    </section>
private List<Chapter> parseHtmlToChapters(RequestWrapper request, String unparsedHtml) {
    int beginIndex = unparsedHtml.indexOf("<div class=\"post-cnt\">");
    int endIndex = unparsedHtml.indexOf("</div>", beginIndex);

    String trimmedHtml = unparsedHtml.substring(beginIndex, endIndex);

    Document parsedDocument = Jsoup.parse(trimmedHtml);


    List<Chapter> chapterList = scrapeChaptersFromParsedDocument(parsedDocument);
    chapterList = setSourceForChapterList(chapterList);
    chapterList = setParentUrlForChapterList(chapterList, request.getUrl());
    chapterList = setNumberForChapterList(chapterList);

    saveChaptersToDatabase(chapterList, request.getUrl());

    return chapterList;
}

private List<Chapter> scrapeChaptersFromParsedDocument(Document parsedDocument) {
    List<Chapter> chapterList = new ArrayList<Chapter>();

    Element chapterElementnya = parsedDocument.select("div.post-cnt").get(1);
    Elements chapterElements = chapterElementnya.getElementsByTag("li");


    for (Element chapterElement : chapterElements) {
        Chapter currentChapter = constructChapterFromHtmlBlock(chapterElement);

        chapterList.add(currentChapter);
    }

    return chapterList;
}

private Chapter constructChapterFromHtmlBlock(Element chapterElement) {
    Chapter newChapter = DefaultFactory.Chapter.constructDefault();

    Element urlElement = chapterElement.select("a").first();
    Element nameElement = chapterElement.select("a").first();

    if (urlElement != null) {
        String fieldUrl = "http://www.pecintakomik.com" + urlElement.attr("href");
        newChapter.setUrl(fieldUrl);
    }
    if (nameElement != null) {
        String fieldName = nameElement.text();
        newChapter.setName(fieldName);
    }

    boolean fieldNew = chapterElement.html().contains("<img src=\"/images/new.gif\">");
    newChapter.setNew(fieldNew);

    return newChapter;
}

漫画家布拉布拉
  • Nama备选方案:
  • 塔贡瑞利斯:2010
  • 作者:sense1
  • 艺术家:感官2
  • 类型:动作
  • 中国人参:bla bla bla bla
列表章节
我的代码是获取列表漫画的href链接(并将其存储在sqllite上),但我无法获取:

java代码:

  <section class="post">
        <img class="pecintakomik" src="/images/top/op.jpg" alt="pecintakomik.com" />
            <div class="post-cnt">
                <h2>Manga bla bla</h2>
                    <ul>
                    <li><strong>Nama Alternatif:</strong> </li>
                    <li><strong>Tahun Rilis:</strong> 2010</li>
                    <li><strong>Author(s):</strong> sensei1
                    <li><strong>Artist(s):</strong> sense2</li>
                    <li><strong>Genre:</strong> Action</li>
                    <li><strong>Sinopsis:</strong> bla bla bla </li>
                    <li><span class='st_facebook_hcount' displayText='Facebook'></span> <span class='st_twitter_hcount' displayText='Tweet'></span> <span class='st_sharethis_hcount' displayText='ShareThis'></span></li>                      
                    </ul>
            </div>
                <div class="clear">&nbsp;</div>
    </section>
    <img src="http://www.pecintakomik.com/images/block.png">
    <section class="post">
        <div class="post-cnt">
            <h2>List Chapter(s)</h2>
            <ul>
                <li><a href="/manga/bla_bla/816"> bla bl 816 <img src="/images/new.gif"><em>Baca Online </em></a></li>
                <li><a href="/manga/bla_bla/815"> bla bla 815<em>Baca Online </em></a></li>
                <li><a href="/manga/bla_bla/814"> bla bla 814<em>Baca Online </em></a></li>
                <li><a href="/manga/bla_bla/813"> bla bla 813<em>Baca Online </em></a></li>
            </ul>
       </div>
    </section>
private List<Chapter> parseHtmlToChapters(RequestWrapper request, String unparsedHtml) {
    int beginIndex = unparsedHtml.indexOf("<div class=\"post-cnt\">");
    int endIndex = unparsedHtml.indexOf("</div>", beginIndex);

    String trimmedHtml = unparsedHtml.substring(beginIndex, endIndex);

    Document parsedDocument = Jsoup.parse(trimmedHtml);


    List<Chapter> chapterList = scrapeChaptersFromParsedDocument(parsedDocument);
    chapterList = setSourceForChapterList(chapterList);
    chapterList = setParentUrlForChapterList(chapterList, request.getUrl());
    chapterList = setNumberForChapterList(chapterList);

    saveChaptersToDatabase(chapterList, request.getUrl());

    return chapterList;
}

private List<Chapter> scrapeChaptersFromParsedDocument(Document parsedDocument) {
    List<Chapter> chapterList = new ArrayList<Chapter>();

    Element chapterElementnya = parsedDocument.select("div.post-cnt").get(1);
    Elements chapterElements = chapterElementnya.getElementsByTag("li");


    for (Element chapterElement : chapterElements) {
        Chapter currentChapter = constructChapterFromHtmlBlock(chapterElement);

        chapterList.add(currentChapter);
    }

    return chapterList;
}

private Chapter constructChapterFromHtmlBlock(Element chapterElement) {
    Chapter newChapter = DefaultFactory.Chapter.constructDefault();

    Element urlElement = chapterElement.select("a").first();
    Element nameElement = chapterElement.select("a").first();

    if (urlElement != null) {
        String fieldUrl = "http://www.pecintakomik.com" + urlElement.attr("href");
        newChapter.setUrl(fieldUrl);
    }
    if (nameElement != null) {
        String fieldName = nameElement.text();
        newChapter.setName(fieldName);
    }

    boolean fieldNew = chapterElement.html().contains("<img src=\"/images/new.gif\">");
    newChapter.setNew(fieldNew);

    return newChapter;
}
private List parsehtmlotchapters(RequestWrapper请求,字符串unparsedHtml){
int beginIndex=unparsedHtml.indexOf(“”);
int-endIndex=unparsedHtml.indexOf(“,beginIndex);
String trimmedHtml=unparsedHtml.substring(beginIndex,endIndex);
Document parsedDocument=Jsoup.parse(trimmedHtml);
List chapterList=来自parsedDocument(parsedDocument)的scrapeChaptersFromParsedDocument;
chapterList=设置资源检索表(chapterList);
chapterList=setParentUrlForChapterList(chapterList,request.getUrl());
chapterList=设置编号或chapterList(chapterList);
saveChaptersToDatabase(chapterList,request.getUrl());
返回章节列表;
}
来自parsedDocument的私有列表章节(Document parsedDocument){
List chapterList=new ArrayList();
元素chapterElementnya=parsedDocument.select(“div.post-cnt”).get(1);
Elements chapterements=chapterementnya.getElementsByTag(“li”);
对于(元素chapterElement:chapterElements){
Chapter currentChapter=来自HTMLBlock的构造Chapter(chapterElement);
章节列表。添加(当前章节);
}
返回章节列表;
}
私有章构造函数chapterFromHtmlBlock(元素chapterElement){
Chapter newChapter=DefaultFactory.Chapter.constructDefault();
元素urlement=chapterement.select(“a”).first();
元素名称Element=chapterement.select(“a”).first();
if(urlement!=null){
字符串字段URL=”http://www.pecintakomik.com“+urlement.attr(“href”);
newChapter.setUrl(fieldUrl);
}
if(namelement!=null){
String fieldName=namelement.text();
newChapter.setName(字段名);
}
boolean fieldNew=chapterElement.html()包含(“”);
newChapter.setNew(fieldNew);
返回新章节;
}
请任何人知道我如何获得具有相同名称的第二类的列表?

此代码:

private List<Chapter> parseHtmlToChapters(RequestWrapper request, String unparsedHtml) {
    int beginIndex = unparsedHtml.indexOf("<div class=\"post-cnt\">");
    int endIndex = unparsedHtml.indexOf("</div>", beginIndex);

    String trimmedHtml = unparsedHtml.substring(beginIndex, endIndex);
    ...
}
要保留这两个列表,可以执行以下操作:

int beginIndex = unparsedHtml.indexOf("<div class=\"post-cnt\">");
int secondListStart = unparsedHtml.indexOf("<div class=\"post-cnt\">",beginIndex + "<div class=\"post-cnt\">".length());
int endIndex = unparsedHtml.indexOf("</div>", secondListStart) + "</div>".length();

String trimmedHtml = unparsedHtml.substring(beginIndex, endIndex);
致:

此代码:

private List<Chapter> parseHtmlToChapters(RequestWrapper request, String unparsedHtml) {
    int beginIndex = unparsedHtml.indexOf("<div class=\"post-cnt\">");
    int endIndex = unparsedHtml.indexOf("</div>", beginIndex);

    String trimmedHtml = unparsedHtml.substring(beginIndex, endIndex);
    ...
}
要保留这两个列表,可以执行以下操作:

int beginIndex = unparsedHtml.indexOf("<div class=\"post-cnt\">");
int secondListStart = unparsedHtml.indexOf("<div class=\"post-cnt\">",beginIndex + "<div class=\"post-cnt\">".length());
int endIndex = unparsedHtml.indexOf("</div>", secondListStart) + "</div>".length();

String trimmedHtml = unparsedHtml.substring(beginIndex, endIndex);
致:

试试这个

private List<Chapter> parseHtmlToChapters(RequestWrapper request, String unparsedHtml) {

    Document parsedDocument = Jsoup.parse(unparsedHtml);

    List<Chapter> chapterList = new ArrayList<>();

    for (Element a : parsedDocument.select("div.post-cnt a")) {
        Chapter newChapter = DefaultFactory.Chapter.constructDefault();
        newChapter.setUrl("http://www.pecintakomik.com" + a.attr("href"));
        newChapter.setName(a.text());
        newChapter.setNew(!a.select("img[src=/images/new.gif]").isEmpty());
        chapterList.add(newChapter);
    }
    // .....
private List parsehtmlotchapters(RequestWrapper请求,字符串unparsedHtml){
Document parsedDocument=Jsoup.parse(unparsedHtml);
List chapterList=new ArrayList();
对于(元素a:parsedDocument.select(“div.post-cnt a”)){
Chapter newChapter=DefaultFactory.Chapter.constructDefault();
newChapter.setUrl(“http://www.pecintakomik.com“+a.attr(“href”);
newChapter.setName(a.text());
newChapter.setNew(!a.select(“img[src=/images/new.gif]”)。isEmpty();
章节列表。添加(新章节);
}
// .....
parsedDocument.select(“div.post-cnt a”)
选择所有
元素下的所有
元素。示例HTML中有四个这样的元素。

试试这个

private List<Chapter> parseHtmlToChapters(RequestWrapper request, String unparsedHtml) {

    Document parsedDocument = Jsoup.parse(unparsedHtml);

    List<Chapter> chapterList = new ArrayList<>();

    for (Element a : parsedDocument.select("div.post-cnt a")) {
        Chapter newChapter = DefaultFactory.Chapter.constructDefault();
        newChapter.setUrl("http://www.pecintakomik.com" + a.attr("href"));
        newChapter.setName(a.text());
        newChapter.setNew(!a.select("img[src=/images/new.gif]").isEmpty());
        chapterList.add(newChapter);
    }
    // .....
private List parsehtmlotchapters(RequestWrapper请求,字符串unparsedHtml){
Document parsedDocument=Jsoup.parse(unparsedHtml);
List chapterList=new ArrayList();
对于(元素a:parsedDocument.select(“div.post-cnt a”)){
Chapter newChapter=DefaultFactory.Chapter.constructDefault();
newChapter.setUrl(“http://www.pecintakomik.com“+a.attr(“href”);
newChapter.setName(a.text());
newChapter.setNew(!a.select(“img[src=/images/new.gif]”)。isEmpty();
章节列表。添加(新章节);
}
// .....

parsedDocument.select(“div.post-cnt a”)
选择所有
元素下的所有
元素。您的示例HTML中有四个这样的元素。

如何获得第二个?不是第一个。为什么解析整个页面更安全?@poundPound解析整个页面更安全,因为HTML语法没有那么严格,这意味着
indexOf(“”)如果HTML代码包含额外的空格或使用
而不是
作为引号或谁知道还有什么,
Jsoup
将失败。要仅获取第二个列表,可以使用:
String trimmedHtml=unparsedHtml.substring(secondListStart,endIndex)
。我怎么能得到第二个呢?不是第一个。为什么解析整个页面更安全?@poundPound解析整个页面更安全,因为HTML语法没有那么严格,这意味着如果HTML代码包含额外的空格或使用
而不是
,则
索引(“”
将失败
作为引号或谁知道还有什么,
Jsoup
处理所有这些。要仅获取第二个列表,可以使用:
String trimmedHtml=unparsedHtml.substring(secondListStart,endIndex);