用JSoup合并Java中的HTML文件_Java_Html_For Loop_Jsoup_Bufferedwriter

用JSoup合并Java中的HTML文件

java html for-loop

用JSoup合并Java中的HTML文件,java,html,for-loop,jsoup,bufferedwriter,Java,Html,For Loop,Jsoup,Bufferedwriter,我正在尝试使用Jsoup将几个.html文件合并成一个.html文件。我的想法是在dir中获取.html文件列表，并将名称存储在数组列表中。然后，我将通过ArrayList循环，将每个文件名作为字符串传递给Jsoup.parse（）方法我能够毫无问题地填充ArrayList，并且我的代码一次只能处理一个文件，但是当我添加到下面的for loops时，创建了NEW_INFORMATION.html文件，但没有填充任何内容。你知道我遗漏了什么吗以下是当前代码： public class merg

我正在尝试使用Jsoup将几个

.html

文件合并成一个

.html

文件。我的想法是在

dir

中获取

.html

文件列表，并将名称存储在

数组列表中。然后，我将通过ArrayList
循环，将每个文件名作为字符串传递给Jsoup.parse（）方法
我能够毫无问题地填充ArrayList
，并且我的代码一次只能处理一个文件，但是当我添加到下面的for loops
时，创建了NEW_INFORMATION.html
文件，但没有填充任何内容。你知道我遗漏了什么吗
以下是当前代码：
public class mergeFiles {

    public static void main(String[] args) throws IOException {

        File outputFile = new File ("C:\\Users\\1234\\Desktop\\PowerShellOutput\\NEW_INFORMATION.html");
        File dir = new File ("C:\\Users\\1234\\Desktop\\PowerShellOutput\\");
        File [] paths;
        //Only capture files with extension .html
        FilenameFilter fileNameFilter = new FilenameFilter(){
            public boolean accept(File dir, String name) {
                // TODO Auto-generated method stub
                if (name.lastIndexOf('.') > 0) {
                    int lastIndex = name.lastIndexOf('.');
                    String extension = name.substring(lastIndex);
                    if(extension.equals(".html")){
                        return true;
                    }
                }
                return false;
            }
        };      
        paths = dir.listFiles(fileNameFilter);
        List<String> list = new ArrayList<String>();
        for (File x : paths){
            list.add(x.toString());
        }
        System.out.print(list);
        for (String s : list){
            File input = new File(s);
            Document doc = Jsoup.parse(input, "UTF-8"); 
            Elements links = doc.select("table");
            @SuppressWarnings("resource")
            BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new       FileOutputStream(outputFile), "UTF-8"));
            bw.append("<h2>" + s.toString() + "<h2>");
            bw.append(links.toString());
        }
    }
}

公共类合并文件{
公共静态void main（字符串[]args）引发IOException{
File outputFile=新文件（“C:\\Users\\1234\\Desktop\\PowerShellOutput\\new_INFORMATION.html”）；
文件目录=新文件（“C:\\Users\\1234\\Desktop\\PowerShellOutput\\”；
文件[]路径；
//仅捕获扩展名为.html的文件
FilenameFilter FilenameFilter=新FilenameFilter（）{
公共布尔接受（文件目录，字符串名称）{
//TODO自动生成的方法存根
如果（name.lastIndexOf（'.'）>0）{
int lastIndex=name.lastIndexOf（'.'）；
字符串扩展名=name.substring（lastIndex）；
if（扩展名为.equals（“.html”））{
返回true；
}
}
返回false；
}
};      
path=dir.listFiles（fileNameFilter）；
列表=新的ArrayList（）；
用于（文件x:路径）{
list.add（x.toString（））；
}
系统输出打印（列表）；
用于（字符串s:列表）{
文件输入=新文件；
文档doc=Jsoup.parse（输入“UTF-8”）；
元素链接=文件选择（“表格”）；
@抑制警告（“资源”）
BufferedWriter bw=新的BufferedWriter（新的OutputStreamWriter（新的文件OutputStream（outputFile），“UTF-8”）；
追加（“+s.toString（）+”）；
append（links.toString（））；
}
}
}

我还尝试了这个变体，但没有将路径转换为字符串（结果相同）：
用于（文件x:路径）{
文档doc=Jsoup.parse（x，“UTF-8”）；
元素链接=文件选择（“表格”）；
@抑制警告（“资源”）
BufferedWriter bw=新的BufferedWriter（新的OutputStreamWriter（新的文件OutputStream（outputFile），“UTF-8”）；
追加（“+x.toString（）+”）；
append（links.toString（））；
}

为将来可能需要以下内容的任何人提供完整答案：
package htmlMerge;

import java.io.*;
import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;

public class mergeFiles {

public static void main(String[] args) throws IOException {

    try {
        String outFileName = System.getProperty("user.home") + "/Desktop/<Insert The Directory/name.html>";
        File outputFile = new File(outFileName);
        String desktopDir = System.getProperty("user.home") + "/Desktop/<Insert Dir name>";
        File dir = new File(desktopDir);
        File[] paths;
        //create a file filter that will only worry about .html files if your folder contains other extensions
        FilenameFilter fileNameFilter = new FilenameFilter() {
            public boolean accept(File dir, String name) {
                if (name.lastIndexOf('.') > 0) {
                    int lastIndex = name.lastIndexOf('.');
                    String extension = name.substring(lastIndex);
                    if (extension.equals(".html")) {
                        return true;
                    }
                }
                return false;
            }
        };
        paths = dir.listFiles(fileNameFilter);
        //use BufferedWriterd to create the initial .html file with a header
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(outputFile), "UTF-8"));
        bw.write("<h1>REPORT DATA</h1>");
        bw.close();
        /*Use file writer to append the .html file with additional .html files
        In this case, the .html files all contain One 'table', so this
        will append the tables to 'outputFile'.*/
        try {
            String file = outputFile.getAbsolutePath();
            FileWriter fw = new FileWriter(file, true);
            for (File x : paths) {
                Document doc = Jsoup.parse(x, "UTF-8");
                Elements links = doc.select("table");
                //adds the filename of the .html as a Level 2 heading
                fw.write("<h2>" + x.toString() + "</h2>");
                fw.write(links.toString());
            }
            fw.close();
        }catch (IOException ioe) {
            System.err.println(ioe.getMessage());
        } finally {
            bw.close();
        }
    } catch (IOException ioe) {
        System.out.println(ioe.getMessage());
    }
    System.out.println("\nMerge Completed Successfully");
  }
}

package-htmlMerge；
导入java.io.*；
导入org.jsoup.*；
导入org.jsoup.nodes.*；
导入org.jsoup.select.Elements；
公共类合并文件{
公共静态void main（字符串[]args）引发IOException{
试一试{
字符串outFileName=System.getProperty（“user.home”）+“/Desktop/”；
File outputFile=新文件（outFileName）；
字符串desktopDir=System.getProperty（“user.home”）+“/Desktop/”；
文件目录=新文件（desktopDir）；
文件[]路径；
//创建一个文件过滤器，如果文件夹包含其他扩展名，则只考虑.html文件
FilenameFilter FilenameFilter=新FilenameFilter（）{
公共布尔接受（文件目录，字符串名称）{
如果（name.lastIndexOf（'.'）>0）{
int lastIndex=name.lastIndexOf（'.'）；
字符串扩展名=name.substring（lastIndex）；
if（扩展名为.equals（“.html”））{
返回true；
}
}
返回false；
}
};
path=dir.listFiles（fileNameFilter）；
//使用BufferedWriterd创建带有头的初始.html文件
BufferedWriter bw=新的BufferedWriter（新的OutputStreamWriter(
新文件outputstream（outputFile），“UTF-8”）；
bw.写入（“报告数据”）；
bw.close（）；
/*使用文件编写器将.html文件附加到其他.html文件中
在本例中，.html文件都包含一个“表”，因此
将表附加到“outputFile”中*/
试一试{
String file=outputFile.getAbsolutePath（）；
FileWriter fw=新的FileWriter（文件，true）；
用于（文件x:路径）{
文档doc=Jsoup.parse（x，“UTF-8”）；
元素链接=文件选择（“表格”）；
//将.html的文件名添加为2级标题
fw.write（“+x.toString（）+”）；
write（links.toString（））；
}
fw.close（）；
}捕获（ioe异常ioe）{
System.err.println（ioe.getMessage（））；
}最后{
bw.close（）；
}
}捕获（ioe异常ioe）{
System.out.println（ioe.getMessage（））；
}
System.out.println（“\n合并成功完成”）；
}
}
您必须关闭缓冲写入程序才能查看更改。您忘记了bw。关闭（）您是对的。我想我可以暂时抑制它，在try/catch工作时关闭它；如果你作为答案发布，我会标记它。谢谢你的时间@alkis没问题。谢谢你的意见。玩得高兴
package htmlMerge;

import java.io.*;
import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;

public class mergeFiles {

public static void main(String[] args) throws IOException {

    try {
        String outFileName = System.getProperty("user.home") + "/Desktop/<Insert The Directory/name.html>";
        File outputFile = new File(outFileName);
        String desktopDir = System.getProperty("user.home") + "/Desktop/<Insert Dir name>";
        File dir = new File(desktopDir);
        File[] paths;
        //create a file filter that will only worry about .html files if your folder contains other extensions
        FilenameFilter fileNameFilter = new FilenameFilter() {
            public boolean accept(File dir, String name) {
                if (name.lastIndexOf('.') > 0) {
                    int lastIndex = name.lastIndexOf('.');
                    String extension = name.substring(lastIndex);
                    if (extension.equals(".html")) {
                        return true;
                    }
                }
                return false;
            }
        };
        paths = dir.listFiles(fileNameFilter);
        //use BufferedWriterd to create the initial .html file with a header
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(outputFile), "UTF-8"));
        bw.write("<h1>REPORT DATA</h1>");
        bw.close();
        /*Use file writer to append the .html file with additional .html files
        In this case, the .html files all contain One 'table', so this
        will append the tables to 'outputFile'.*/
        try {
            String file = outputFile.getAbsolutePath();
            FileWriter fw = new FileWriter(file, true);
            for (File x : paths) {
                Document doc = Jsoup.parse(x, "UTF-8");
                Elements links = doc.select("table");
                //adds the filename of the .html as a Level 2 heading
                fw.write("<h2>" + x.toString() + "</h2>");
                fw.write(links.toString());
            }
            fw.close();
        }catch (IOException ioe) {
            System.err.println(ioe.getMessage());
        } finally {
            bw.close();
        }
    } catch (IOException ioe) {
        System.out.println(ioe.getMessage());
    }
    System.out.println("\nMerge Completed Successfully");
  }
}