用JSoup合并Java中的HTML文件
我正在尝试使用Jsoup将几个用JSoup合并Java中的HTML文件,java,html,for-loop,jsoup,bufferedwriter,Java,Html,For Loop,Jsoup,Bufferedwriter,我正在尝试使用Jsoup将几个.html文件合并成一个.html文件。我的想法是在dir中获取.html文件列表,并将名称存储在数组列表中。然后,我将通过ArrayList循环,将每个文件名作为字符串传递给Jsoup.parse()方法 我能够毫无问题地填充ArrayList,并且我的代码一次只能处理一个文件,但是当我添加到下面的for loops时,创建了NEW_INFORMATION.html文件,但没有填充任何内容。你知道我遗漏了什么吗 以下是当前代码: public class merg
.html
文件合并成一个.html
文件。我的想法是在dir
中获取.html
文件列表,并将名称存储在数组列表中。然后,我将通过ArrayList
循环,将每个文件名作为字符串传递给Jsoup.parse()方法
我能够毫无问题地填充ArrayList
,并且我的代码一次只能处理一个文件,但是当我添加到下面的for loops
时,创建了NEW_INFORMATION.html
文件,但没有填充任何内容。你知道我遗漏了什么吗
以下是当前代码:
public class mergeFiles {
public static void main(String[] args) throws IOException {
File outputFile = new File ("C:\\Users\\1234\\Desktop\\PowerShellOutput\\NEW_INFORMATION.html");
File dir = new File ("C:\\Users\\1234\\Desktop\\PowerShellOutput\\");
File [] paths;
//Only capture files with extension .html
FilenameFilter fileNameFilter = new FilenameFilter(){
public boolean accept(File dir, String name) {
// TODO Auto-generated method stub
if (name.lastIndexOf('.') > 0) {
int lastIndex = name.lastIndexOf('.');
String extension = name.substring(lastIndex);
if(extension.equals(".html")){
return true;
}
}
return false;
}
};
paths = dir.listFiles(fileNameFilter);
List<String> list = new ArrayList<String>();
for (File x : paths){
list.add(x.toString());
}
System.out.print(list);
for (String s : list){
File input = new File(s);
Document doc = Jsoup.parse(input, "UTF-8");
Elements links = doc.select("table");
@SuppressWarnings("resource")
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8"));
bw.append("<h2>" + s.toString() + "<h2>");
bw.append(links.toString());
}
}
}
公共类合并文件{
公共静态void main(字符串[]args)引发IOException{
File outputFile=新文件(“C:\\Users\\1234\\Desktop\\PowerShellOutput\\new_INFORMATION.html”);
文件目录=新文件(“C:\\Users\\1234\\Desktop\\PowerShellOutput\\”;
文件[]路径;
//仅捕获扩展名为.html的文件
FilenameFilter FilenameFilter=新FilenameFilter(){
公共布尔接受(文件目录,字符串名称){
//TODO自动生成的方法存根
如果(name.lastIndexOf('.')>0){
int lastIndex=name.lastIndexOf('.');
字符串扩展名=name.substring(lastIndex);
if(扩展名为.equals(“.html”)){
返回true;
}
}
返回false;
}
};
path=dir.listFiles(fileNameFilter);
列表=新的ArrayList();
用于(文件x:路径){
list.add(x.toString());
}
系统输出打印(列表);
用于(字符串s:列表){
文件输入=新文件;
文档doc=Jsoup.parse(输入“UTF-8”);
元素链接=文件选择(“表格”);
@抑制警告(“资源”)
BufferedWriter bw=新的BufferedWriter(新的OutputStreamWriter(新的文件OutputStream(outputFile),“UTF-8”);
追加(“+s.toString()+”);
append(links.toString());
}
}
}
我还尝试了这个变体,但没有将路径转换为字符串(结果相同):
用于(文件x:路径){
文档doc=Jsoup.parse(x,“UTF-8”);
元素链接=文件选择(“表格”);
@抑制警告(“资源”)
BufferedWriter bw=新的BufferedWriter(新的OutputStreamWriter(新的文件OutputStream(outputFile),“UTF-8”);
追加(“+x.toString()+”);
append(links.toString());
}
为将来可能需要以下内容的任何人提供完整答案:
package htmlMerge;
import java.io.*;
import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;
public class mergeFiles {
public static void main(String[] args) throws IOException {
try {
String outFileName = System.getProperty("user.home") + "/Desktop/<Insert The Directory/name.html>";
File outputFile = new File(outFileName);
String desktopDir = System.getProperty("user.home") + "/Desktop/<Insert Dir name>";
File dir = new File(desktopDir);
File[] paths;
//create a file filter that will only worry about .html files if your folder contains other extensions
FilenameFilter fileNameFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
if (name.lastIndexOf('.') > 0) {
int lastIndex = name.lastIndexOf('.');
String extension = name.substring(lastIndex);
if (extension.equals(".html")) {
return true;
}
}
return false;
}
};
paths = dir.listFiles(fileNameFilter);
//use BufferedWriterd to create the initial .html file with a header
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(outputFile), "UTF-8"));
bw.write("<h1>REPORT DATA</h1>");
bw.close();
/*Use file writer to append the .html file with additional .html files
In this case, the .html files all contain One 'table', so this
will append the tables to 'outputFile'.*/
try {
String file = outputFile.getAbsolutePath();
FileWriter fw = new FileWriter(file, true);
for (File x : paths) {
Document doc = Jsoup.parse(x, "UTF-8");
Elements links = doc.select("table");
//adds the filename of the .html as a Level 2 heading
fw.write("<h2>" + x.toString() + "</h2>");
fw.write(links.toString());
}
fw.close();
}catch (IOException ioe) {
System.err.println(ioe.getMessage());
} finally {
bw.close();
}
} catch (IOException ioe) {
System.out.println(ioe.getMessage());
}
System.out.println("\nMerge Completed Successfully");
}
}
package-htmlMerge;
导入java.io.*;
导入org.jsoup.*;
导入org.jsoup.nodes.*;
导入org.jsoup.select.Elements;
公共类合并文件{
公共静态void main(字符串[]args)引发IOException{
试一试{
字符串outFileName=System.getProperty(“user.home”)+“/Desktop/”;
File outputFile=新文件(outFileName);
字符串desktopDir=System.getProperty(“user.home”)+“/Desktop/”;
文件目录=新文件(desktopDir);
文件[]路径;
//创建一个文件过滤器,如果文件夹包含其他扩展名,则只考虑.html文件
FilenameFilter FilenameFilter=新FilenameFilter(){
公共布尔接受(文件目录,字符串名称){
如果(name.lastIndexOf('.')>0){
int lastIndex=name.lastIndexOf('.');
字符串扩展名=name.substring(lastIndex);
if(扩展名为.equals(“.html”)){
返回true;
}
}
返回false;
}
};
path=dir.listFiles(fileNameFilter);
//使用BufferedWriterd创建带有头的初始.html文件
BufferedWriter bw=新的BufferedWriter(新的OutputStreamWriter(
新文件outputstream(outputFile),“UTF-8”);
bw.写入(“报告数据”);
bw.close();
/*使用文件编写器将.html文件附加到其他.html文件中
在本例中,.html文件都包含一个“表”,因此
将表附加到“outputFile”中*/
试一试{
String file=outputFile.getAbsolutePath();
FileWriter fw=新的FileWriter(文件,true);
用于(文件x:路径){
文档doc=Jsoup.parse(x,“UTF-8”);
元素链接=文件选择(“表格”);
//将.html的文件名添加为2级标题
fw.write(“+x.toString()+”);
write(links.toString());
}
fw.close();
}捕获(ioe异常ioe){
System.err.println(ioe.getMessage());
}最后{
bw.close();
}
}捕获(ioe异常ioe){
System.out.println(ioe.getMessage());
}
System.out.println(“\n合并成功完成”);
}
}
您必须关闭缓冲写入程序才能查看更改。您忘记了bw。关闭()您是对的。我想我可以暂时抑制它,在try/catch工作时关闭它;如果你作为答案发布,我会标记它。谢谢你的时间@alkis没问题。谢谢你的意见。玩得高兴
package htmlMerge;
import java.io.*;
import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;
public class mergeFiles {
public static void main(String[] args) throws IOException {
try {
String outFileName = System.getProperty("user.home") + "/Desktop/<Insert The Directory/name.html>";
File outputFile = new File(outFileName);
String desktopDir = System.getProperty("user.home") + "/Desktop/<Insert Dir name>";
File dir = new File(desktopDir);
File[] paths;
//create a file filter that will only worry about .html files if your folder contains other extensions
FilenameFilter fileNameFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
if (name.lastIndexOf('.') > 0) {
int lastIndex = name.lastIndexOf('.');
String extension = name.substring(lastIndex);
if (extension.equals(".html")) {
return true;
}
}
return false;
}
};
paths = dir.listFiles(fileNameFilter);
//use BufferedWriterd to create the initial .html file with a header
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(outputFile), "UTF-8"));
bw.write("<h1>REPORT DATA</h1>");
bw.close();
/*Use file writer to append the .html file with additional .html files
In this case, the .html files all contain One 'table', so this
will append the tables to 'outputFile'.*/
try {
String file = outputFile.getAbsolutePath();
FileWriter fw = new FileWriter(file, true);
for (File x : paths) {
Document doc = Jsoup.parse(x, "UTF-8");
Elements links = doc.select("table");
//adds the filename of the .html as a Level 2 heading
fw.write("<h2>" + x.toString() + "</h2>");
fw.write(links.toString());
}
fw.close();
}catch (IOException ioe) {
System.err.println(ioe.getMessage());
} finally {
bw.close();
}
} catch (IOException ioe) {
System.out.println(ioe.getMessage());
}
System.out.println("\nMerge Completed Successfully");
}
}