Java HTML标记解析提取标题和其他_Java_Html_Parsing_Bufferedreader

Java HTML标记解析提取标题和其他

java html parsing

Java HTML标记解析提取标题和其他,java,html,parsing,bufferedreader,Java,Html,Parsing,Bufferedreader,试图从网站中提取标题，打印标题时出现流关闭错误。尝试在标题标记之间提取，例如。不熟悉，请详细解释。谢谢 import java.lang.*; import java.util.Scanner; import java.net.*; import java.io.*; public class Allrecipes{ public static void main(String[] args) throws Exception{ System.out.println(

试图从网站中提取标题，打印标题时出现流关闭错误。尝试在标题标记之间提取，例如。不熟悉，请详细解释。谢谢

import java.lang.*;
import java.util.Scanner;
import java.net.*;
import java.io.*;




public class Allrecipes{
  public static void main(String[] args) throws Exception{  


    System.out.println("Colby Mehmen");

    Scanner input = new Scanner(System.in);
    String str1 = "";
    str1 = compare();

    if (str1.contains("http://allrecipes.com")){



        URL oracle = new URL(str1);
        BufferedReader in = new BufferedReader(
        new InputStreamReader(oracle.openStream()));

        String html;
        while ((html = in.readLine()) != null)  

            in.close();




     String page = html;

     int start = page.indexOf("<title>");
     int end = page.indexOf("</title>");

String title = page.substring(start+"<title>".length(),end);

System.out.println(title);


    }//end program





  }

JSoup 试试看，它真的很容易使用

Document doc = Jsoup.connect(YOUR_WEBSITE).get();
Elements tt = doc.select("title");
System.out.println(tt.text());

你的代码

如果您格式化了您的代码，那就太好了。@SotiriosDelimanolis很抱歉，匆忙清理了一点java.io.IOException:Stream closed^错误我现在收到了查看我更新的答案，如果您接受这个答案，我们将不胜感激。当我使用上面的代码时，它说找不到symbol symbol:类文档您需要先添加JSoup库好的，当我修复了打印出的标题并且仍然重复0'时，我不知道您的代码，JSoup将按原样返回标题，如果它有零，那么它将返回零，我真的不明白你的问题你的JSoup API链接断了。你可能是说。

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Scanner;

public class Allrecipes {
    public static void main(String[] args) throws Exception {

    System.out.println("Colby Mehmen");

    // http://allrecipes.com/Recipe/Cardamom-Maple-Salmon/Detail.aspx?soid=carousel_0_rotd&prop24=rotd

    String str1 = "";
    str1 = compare();

    if (str1.contains("http://allrecipes.com")) {

        URL oracle = new URL(str1);
        BufferedReader in = new BufferedReader(new InputStreamReader(
                oracle.openStream()));

        String html = null;
        String line;
        while ((line = in.readLine()) != null)
            html += line;

        in.close();

        String page = html;

        int start = page.indexOf("<title>");
        int end = page.indexOf("</title>");

        String title = page.substring(start+7, end);
        System.out.println(title);

    }// end program

}

public static String compare() {
    Scanner input = new Scanner(System.in);

    System.out.println("Enter recipe URL: ");
    String str1 = input.next();
    String str2 = "allrecipes.com";
    String str3 = "http://";

    boolean b = str1.contains(str2);

    if (b == true) {
        boolean c = str1.contains(str3 + str2);

        if (c == false) {
            str1 = str3 + str1;

        }
    }// endifif

    boolean d = str1 != str3 + str2;
    if (d == false) {

        System.out.println("ERROR");
    }

    /* cOUT */System.out.println(str1);

    return str1;
}// end compare

}