Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/326.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何在java中读取Doc或Docx文件?_Java_Doc - Fatal编程技术网

如何在java中读取Doc或Docx文件?

如何在java中读取Doc或Docx文件?,java,doc,Java,Doc,我想用java读一个word文件 import org.apache.poi.poifs.filesystem.*; import org.apache.poi.hpsf.DocumentSummaryInformation; import org.apache.poi.hwpf.*; import org.apache.poi.hwpf.extractor.*; import org.apache.poi.hwpf.usermodel.HeaderStories; import java.i

我想用java读一个word文件

import org.apache.poi.poifs.filesystem.*;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hwpf.*;
import org.apache.poi.hwpf.extractor.*;
import org.apache.poi.hwpf.usermodel.HeaderStories;

import java.io.*;

public class ReadDocFileFromJava {

    public static void main(String[] args) {
        /**This is the document that you want to read using Java.**/
        String fileName = "C:\\Path to file\\Test.doc";

        /**Method call to read the document (demonstrate some useage of POI)**/
        readMyDocument(fileName);

    }
    public static void readMyDocument(String fileName){
        POIFSFileSystem fs = null;
        try {
            fs = new POIFSFileSystem(new FileInputStream(fileName));
            HWPFDocument doc = new HWPFDocument(fs);

            /** Read the content **/
            readParagraphs(doc);

            int pageNumber=1;

            /** We will try reading the header for page 1**/
            readHeader(doc, pageNumber);

            /** Let's try reading the footer for page 1**/
            readFooter(doc, pageNumber);

            /** Read the document summary**/
            readDocumentSummary(doc);

        } catch (Exception e) {
            e.printStackTrace();
        }
    }  

    public static void readParagraphs(HWPFDocument doc) throws Exception{
        WordExtractor we = new WordExtractor(doc);

        /**Get the total number of paragraphs**/
        String[] paragraphs = we.getParagraphText();
        System.out.println("Total Paragraphs: "+paragraphs.length);

        for (int i = 0; i < paragraphs.length; i++) {

            System.out.println("Length of paragraph "+(i +1)+": "+ paragraphs[i].length());
            System.out.println(paragraphs[i].toString());

        }

    }

    public static void readHeader(HWPFDocument doc, int pageNumber){
        HeaderStories headerStore = new HeaderStories( doc);
        String header = headerStore.getHeader(pageNumber);
        System.out.println("Header Is: "+header);

    }

    public static void readFooter(HWPFDocument doc, int pageNumber){
        HeaderStories headerStore = new HeaderStories( doc);
        String footer = headerStore.getFooter(pageNumber);
        System.out.println("Footer Is: "+footer);

    }

    public static void readDocumentSummary(HWPFDocument doc) {
        DocumentSummaryInformation summaryInfo=doc.getDocumentSummaryInformation();
        String category = summaryInfo.getCategory();
        String company = summaryInfo.getCompany();
        int lineCount=summaryInfo.getLineCount();
        int sectionCount=summaryInfo.getSectionCount();
        int slideCount=summaryInfo.getSlideCount();


    enter code here
        System.out.println("---------------------------");
        System.out.println("Category: "+category);
        System.out.println("Company: "+company);
        System.out.println("Line Count: "+lineCount);
        System.out.println("Section Count: "+sectionCount);
        System.out.println("Slide Count: "+slideCount);

    }

}


我想读取Java中的doc或docx文件

以下是ReadDoc/docx.Java的代码:这将读取dox/docx文件并将其内容打印到控制台。你可以自己定制

import java.io.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;

public class ReadDocFile
{
    public static void main(String[] args)
    {
        File file = null;
        WordExtractor extractor = null;
        try
        {

            file = new File("c:\\New.doc");
            FileInputStream fis = new FileInputStream(file.getAbsolutePath());
            HWPFDocument document = new HWPFDocument(fis);
            extractor = new WordExtractor(document);
            String[] fileData = extractor.getParagraphText();
            for (int i = 0; i < fileData.length; i++)
            {
                if (fileData[i] != null)
                    System.out.println(fileData[i]);
            }
        }
        catch (Exception exep)
        {
            exep.printStackTrace();
        }
    }
}
import java.io.*;
导入org.apache.poi.hwpf.hwpf文档;
导入org.apache.poi.hwpf.extractor.WordExtractor;
公共类ReadDocFile
{
公共静态void main(字符串[]args)
{
File=null;
WordExtractor-extractor=null;
尝试
{
文件=新文件(“c:\\new.doc”);
FileInputStream fis=新的FileInputStream(file.getAbsolutePath());
HWPF文件=新的HWPF文件(fis);
提取器=新单词提取器(文档);
String[]fileData=extractor.getParagraphText();
对于(int i=0;i
您在这里真的没有问任何问题。如果没有进一步的细节,这个问题很可能就结束了。您的目标是什么(查看、处理、编辑、打印)?到目前为止你试过什么?什么不起作用?是否有错误?您可能希望对该异常执行某些操作。在这种情况下,word extractor只能提供文档文件的文本。它甚至没有提到段落的开始或结束位置……除了文本(图像、条形码等)外,内容如何。您能进一步编辑代码以读取完整数据吗?有没有办法通过此方法通过偏移量了解页码?此方法仅适用于2007年以前的Office。