Java:使用ApachePOI如何将ms word文件转换为pdf?

Java:使用ApachePOI如何将ms word文件转换为pdf?,java,itext,apache-poi,Java,Itext,Apache Poi,通过使用ApachePOI如何将ms word文件转换为pdf 我正在使用下面的代码,但它不工作给出错误我猜我导入了错误的类 import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.OutputStream; import org.apache.poi.hslf.record.Document; import org.apache.poi.hwpf.

通过使用ApachePOI如何将
ms word
文件转换为
pdf

我正在使用下面的代码,但它不工作给出错误我猜我导入了错误的类

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;

import org.apache.poi.hslf.record.Document;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;


public class TestCon {

    /**
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub

        POIFSFileSystem fs = null;  
         Document document = new Document(); 

         try {  
             System.out.println("Starting the test");  
             fs = new POIFSFileSystem(new FileInputStream("/document/test2.doc"));  

             HWPFDocument doc = new HWPFDocument(fs);  
             WordExtractor we = new WordExtractor(doc);  

             OutputStream file = new FileOutputStream(new File("/document/test.pdf")); 

             PdfWriter writer = PdfWriter.getInstance(document, file);  

             Range range = doc.getRange();
             document.open();  
             writer.setPageEmpty(true);  
             document.newPage();  
             writer.setPageEmpty(true);  

             String[] paragraphs = we.getParagraphText();  
             for (int i = 0; i < paragraphs.length; i++) {  

                 org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
                // CharacterRun run = pr.getCharacterRun(i);
                // run.setBold(true);
                // run.setCapitalized(true);
                // run.setItalic(true);
                 paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");  
             System.out.println("Length:" + paragraphs[i].length());  
             System.out.println("Paragraph" + i + ": " + paragraphs[i].toString());  

             // add the paragraph to the document  
             document.add(new Paragraph(paragraphs[i]));  
             }  

             System.out.println("Document testing completed");  
         } catch (Exception e) {  
             System.out.println("Exception during test");  
             e.printStackTrace();  
         } finally {  
                         // close the document  
            document.close();  
                     }  
         }  
    }
导入java.io.File;
导入java.io.FileInputStream;
导入java.io.FileOutputStream;
导入java.io.OutputStream;
导入org.apache.poi.hslf.record.Document;
导入org.apache.poi.hwpf.hwpf文档;
导入org.apache.poi.hwpf.extractor.WordExtractor;
导入org.apache.poi.hwpf.usermodel.paragration;
导入org.apache.poi.hwpf.usermodel.Range;
导入org.apache.poi.poifs.filesystem.poifsfsystem;
公共类测试控制{
/**
*@param args
*/
公共静态void main(字符串[]args){
//TODO自动生成的方法存根
POIFSFS=null;
文档=新文档();
试试{
System.out.println(“开始测试”);
fs=新的POIFSFISTEM(新的FileInputStream(“/document/test2.doc”);
HWPF文件文件=新的HWPF文件(fs);
WordExtractor we=新的WordExtractor(文档);
OutputStream文件=新文件OutputStream(新文件(“/document/test.pdf”);
PdfWriter writer=PdfWriter.getInstance(文档、文件);
Range Range=doc.getRange();
document.open();
writer.setPageEmpty(true);
document.newPage();
writer.setPageEmpty(true);
String[]段落=we.getParagraphText();
对于(inti=0;i
这里有几个步骤:

  • 使用POI将Word文档读入不可知格式
  • 将格式不可知的表单转换为PDF
  • 编写PDF
  • 我不知道POI是否会帮你做第二步。我想推荐一些别的东西,比如iText。

    解决了

    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.OutputStream;
    
    import com.lowagie.text.Document;
    import com.lowagie.text.DocumentException;
    import com.lowagie.text.Paragraph;
    import com.lowagie.text.pdf.PdfWriter;
    
    
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.extractor.WordExtractor;
    
    import org.apache.poi.hwpf.usermodel.Range;
    import org.apache.poi.poifs.filesystem.POIFSFileSystem;
    
    
    public class TestCon {
    
        /**
         * @param args
         */
        public static void main(String[] args) {
            // TODO Auto-generated method stub
    
            POIFSFileSystem fs = null;  
            Document document = new Document();
    
             try {  
                 System.out.println("Starting the test");  
                 fs = new POIFSFileSystem(new FileInputStream("D:/Resume.doc"));  
    
                 HWPFDocument doc = new HWPFDocument(fs);  
                 WordExtractor we = new WordExtractor(doc);  
    
                 OutputStream file = new FileOutputStream(new File("D:/test.pdf")); 
    
                 PdfWriter writer = PdfWriter.getInstance(document, file);  
    
                 Range range = doc.getRange();
                 document.open();  
                 writer.setPageEmpty(true);  
                 document.newPage();  
                 writer.setPageEmpty(true);  
    
                 String[] paragraphs = we.getParagraphText();  
                 for (int i = 0; i < paragraphs.length; i++) {  
    
                     org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
                    // CharacterRun run = pr.getCharacterRun(i);
                    // run.setBold(true);
                    // run.setCapitalized(true);
                    // run.setItalic(true);
                     paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");  
                 System.out.println("Length:" + paragraphs[i].length());  
                 System.out.println("Paragraph" + i + ": " + paragraphs[i].toString());  
    
                 // add the paragraph to the document  
                 document.add(new Paragraph(paragraphs[i]));  
                 }  
    
                 System.out.println("Document testing completed");  
             } catch (Exception e) {  
                 System.out.println("Exception during test");  
                 e.printStackTrace();  
             } finally {  
                             // close the document  
                document.close();  
                         }  
             }  
        }
    
    导入java.io.File;
    导入java.io.FileInputStream;
    导入java.io.FileOutputStream;
    导入java.io.OutputStream;
    导入com.lowagie.text.Document;
    导入com.lowagie.text.DocumentException;
    导入com.lowagie.text.paragration;
    导入com.lowagie.text.pdf.PdfWriter;
    导入org.apache.poi.hwpf.hwpf文档;
    导入org.apache.poi.hwpf.extractor.WordExtractor;
    导入org.apache.poi.hwpf.usermodel.Range;
    导入org.apache.poi.poifs.filesystem.poifsfsystem;
    公共类测试控制{
    /**
    *@param args
    */
    公共静态void main(字符串[]args){
    //TODO自动生成的方法存根
    POIFSFS=null;
    文档=新文档();
    试试{
    System.out.println(“开始测试”);
    fs=新的POIFSF文件系统(新文件输入流(“D:/Resume.doc”);
    HWPF文件文件=新的HWPF文件(fs);
    WordExtractor we=新的WordExtractor(文档);
    OutputStream文件=新文件OutputStream(新文件(“D:/test.pdf”);
    PdfWriter writer=PdfWriter.getInstance(文档、文件);
    Range Range=doc.getRange();
    document.open();
    writer.setPageEmpty(true);
    document.newPage();
    writer.setPageEmpty(true);
    String[]段落=we.getParagraphText();
    对于(inti=0;i
    作为旁注,还可以直接从Word/Excel内容流动态读取内容,而不是从文件系统读取内容并将其序列化到磁盘,例如从CMIS存储库检索内容时:

    e、 g

    (doc的类型为
    org.apache.chemistry.opencmis.client.api.Document
    ,在本例中,我修改了您的代码,通过opencmis从露天存储库检索word文件,并将其转换为PDF)


    HTH

    以下代码对我有效:

    Public class DocToPdfConverter{
    
    public static void main(String[] args) {
    
            String k=null;
            OutputStream fileForPdf =null;
            try {
    
                String fileName="/document/test2.doc";
                //Below Code is for .doc file 
                if(fileName.endsWith(".doc"))
                {
                HWPFDocument doc = new HWPFDocument(new FileInputStream(
                        fileName));
                WordExtractor we=new WordExtractor(doc);
                k = we.getText();
    
                 fileForPdf = new FileOutputStream(new File(
                            "/document/DocToPdf.pdf")); 
                we.close();
                }
    
                //Below Code for 
    
                else if(fileName.endsWith(".docx"))
                {
                    XWPFDocument docx = new XWPFDocument(new FileInputStream(
                            fileName));
                    // using XWPFWordExtractor Class
                    XWPFWordExtractor we = new XWPFWordExtractor(docx);
                     k = we.getText();
    
                     fileForPdf = new FileOutputStream(new File(
                                "/document/DocxToPdf.pdf"));    
                     we.close();
                }
    
    
    
                Document document = new Document();
                PdfWriter.getInstance(document, fileForPdf);
    
                document.open();
    
                document.add(new Paragraph(k));
    
                document.close();
                fileForPdf.close();
    
    
    
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    
    这对我很有用:-

    资料来源:


    除了Kushagra的答案,这里更新了maven依赖项:

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.xdocreport.converter.docx.xwpf</artifactId>
            <version>2.0.1</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.xdocreport.converter</artifactId>
            <version>2.0.1</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.poi.xwpf.converter.pdf</artifactId>
            <version>2.0.1</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
            <version>2.0.1</version>
        </dependency>
    
    
    
    package pdf;
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.OutputStream;
    
    import org.apache.poi.xwpf.converter.pdf.PdfConverter;
    import org.apache.poi.xwpf.converter.pdf.PdfOptions;
    import org.apache.poi.xwpf.usermodel.XWPFDocument;
    
    public class PDF {
        public static void main(String[] args) throws Exception {
              String inputFile="D:/TEST.docx";
              String outputFile="D:/TEST.pdf";
              if (args != null && args.length == 2) {
                inputFile=args[0];
                outputFile=args[1];
              }
              System.out.println("inputFile:" + inputFile + ",outputFile:"+ outputFile);
              FileInputStream in=new FileInputStream(inputFile);
              XWPFDocument document=new XWPFDocument(in);
              File outFile=new File(outputFile);
              OutputStream out=new FileOutputStream(outFile);
              PdfOptions options=null;
              PdfConverter.getInstance().convert(document,out,options);
            }
    }
    
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.xdocreport.converter.docx.xwpf</artifactId>
            <version>2.0.1</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.xdocreport.converter</artifactId>
            <version>2.0.1</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.poi.xwpf.converter.pdf</artifactId>
            <version>2.0.1</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
            <version>2.0.1</version>
        </dependency>
    
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi</artifactId>
        <version>3.13</version>
    </dependency>
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml</artifactId>
        <version>3.13</version>
    </dependency>
    <dependency>
        <groupId>fr.opensagres.xdocreport</groupId>
        <artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
        <version>LATEST</version>
    </dependency>
    
    public String wordToPDFPOI(String url) throws Exception {
        InputStream doc = new URL(url).openStream();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
    
        XWPFDocument document = new XWPFDocument(doc);
        PdfOptions options = PdfOptions.create();
        PdfConverter.getInstance().convert(document, baos, options);
        String base64_encoded = Base64.encodeBytes(baos.toByteArray());
    
        return base64_encoded;
    }