Java使用ApachePOI库将文档文件转换为pdf,其中包含所有图形、图像、表格、边框等
我正在使用ApachePOI库使用以下java代码将文档文件转换为pdf:Java使用ApachePOI库将文档文件转换为pdf,其中包含所有图形、图像、表格、边框等,java,apache-poi,Java,Apache Poi,我正在使用ApachePOI库使用以下java代码将文档文件转换为pdf: import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.OutputStream; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.extractor.WordExtractor; imp
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import com.lowagie.text.Document;
import com.lowagie.text.Paragraph;
import com.lowagie.text.pdf.PdfWriter;
public class TestDoc {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
POIFSFileSystem fs = null;
Document document = new Document();
try {
System.out.println("Starting the test");
//D:\vijay\doctopdf
fs = new POIFSFileSystem(new FileInputStream("D:/vijay/doctopdf/test.doc"));
HWPFDocument doc = new HWPFDocument(fs);
WordExtractor we = new WordExtractor(doc);
OutputStream file = new FileOutputStream(new File("D:/vijay/doctopdf/test.pdf"));
PdfWriter writer = PdfWriter.getInstance(document, file);
Range range = doc.getRange();
document.open();
writer.setPageEmpty(true);
document.newPage();
writer.setPageEmpty(true);
String[] paragraphs = we.getParagraphText();
for (int i = 0; i < paragraphs.length; i++) {
org.apache.poi.hwpf.usermodel.Paragraph pr = range
.getParagraph(i);
// CharacterRun run = pr.getCharacterRun(i);
// run.setBold(true);
// run.setCapitalized(true);
// run.setItalic(true);
paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");
System.out.println("Length:" + paragraphs[i].length());
System.out.println("Paragraph" + i + ": "
+ paragraphs[i].toString());
// add the paragraph to the document
document.add(new Paragraph(paragraphs[i]));
}
System.out.println("Document testing completed");
} catch (Exception e) {
System.out.println("Exception during test");
e.printStackTrace();
} finally {
// close the document
document.close();
}
}
}
导入java.io.File;
导入java.io.FileInputStream;
导入java.io.FileOutputStream;
导入java.io.OutputStream;
导入org.apache.poi.hwpf.hwpf文档;
导入org.apache.poi.hwpf.extractor.WordExtractor;
导入org.apache.poi.hwpf.usermodel.Range;
导入org.apache.poi.poifs.filesystem.poifsfsystem;
导入com.lowagie.text.Document;
导入com.lowagie.text.paragration;
导入com.lowagie.text.pdf.PdfWriter;
公共类TestDoc{
/**
*@param args
*/
公共静态void main(字符串[]args){
//TODO自动生成的方法存根
POIFSFS=null;
文档=新文档();
试一试{
System.out.println(“开始测试”);
//D:\vijay\doctopdf
fs=新的POIFSF文件系统(新的FileInputStream(“D:/vijay/doctopdf/test.doc”);
HWPF文件文件=新的HWPF文件(fs);
WordExtractor we=新的WordExtractor(文档);
OutputStream文件=新文件OutputStream(新文件(“D:/vijay/doctopdf/test.pdf”);
PdfWriter writer=PdfWriter.getInstance(文档、文件);
Range Range=doc.getRange();
document.open();
writer.setPageEmpty(true);
document.newPage();
writer.setPageEmpty(true);
String[]段落=we.getParagraphText();
对于(int i=0;i
以上代码成功运行(仅转换pdf格式的文本)。但当文档包含表格或图像等时。。它不会以pdf格式出现。
任何人都知道如何以pdf格式获取文档,并且具有完全的准确性和格式。您可以使用Apache Tika解析器中的WordExtractor