Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/haskell/10.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 无法按Apache POI中word文档(docx)的顺序读取所有内容_Java_Ms Word_Apache Poi - Fatal编程技术网

Java 无法按Apache POI中word文档(docx)的顺序读取所有内容

Java 无法按Apache POI中word文档(docx)的顺序读取所有内容,java,ms-word,apache-poi,Java,Ms Word,Apache Poi,我一直试图阅读word文档中的所有内容(包括表格、图片、段落)。我可以使用getBodyElementsIterator()阅读表格和段落,但它不能阅读文档中的图片。 虽然我可以使用getAllPictures()单独阅读图片,但我需要按顺序阅读所有内容 在getBodyElementsIterator()内循环时,我尝试查找XWPFPicture实例,但找不到任何映像实例 Iterator<IBodyElement> iter = xdoc.getBodyElementsItera

我一直试图阅读word文档中的所有内容(包括表格、图片、段落)。我可以使用getBodyElementsIterator()阅读表格和段落,但它不能阅读文档中的图片。 虽然我可以使用getAllPictures()单独阅读图片,但我需要按顺序阅读所有内容

在getBodyElementsIterator()内循环时,我尝试查找XWPFPicture实例,但找不到任何映像实例

Iterator<IBodyElement> iter = xdoc.getBodyElementsIterator();
           while (iter.hasNext()) {
               IBodyElement elem = iter.next();
               if (elem instanceof XWPFParagraph) {
                  System.out.println("para - "+elem.getClass());
               } else if (elem instanceof XWPFTable) {
                  System.out.println("table - "+elem);
               } else if (elem instanceof XWPFPictureData){
                  System.out.println("picture - "+elem);
               } else {
                  System.out.println("else - "+elem);
               }  
            }

它包含段落和表格,但没有任何图片

如评论中所述,如何按照word文档(docx)的顺序阅读ApachePOI中的所有内容的问题太广泛,无法在这里回答。
*.docx
officeopenxml
文件格式的
ZIP
存档文件。它包含文档正文的
document.xml
。这是非常复杂的
XML
,需要遍历。但是,
document.xml
可能包含对
*.docx
ZIP
存档中其他资源的引用,然后也需要遍历这些资源

我能提供的是这个遍历过程的模板。它从开始,首先遍历其中的所有s。根据找到的
IBodyElement
类型,它会进一步遍历进程

import java.io.FileInputStream;

import org.apache.poi.xwpf.usermodel.*;

import java.util.List;

public class WordReadAllContent {

 static void traversePictures(List<XWPFPicture> pictures) throws Exception {
  for (XWPFPicture picture : pictures) {
   System.out.println(picture);
   XWPFPictureData pictureData = picture.getPictureData();
   System.out.println(pictureData);
  }
 }

 static void traverseRunElements(List<IRunElement> runElements) throws Exception {
  for (IRunElement runElement : runElements) {
   if (runElement instanceof XWPFFieldRun) {
    XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
    System.out.println(fieldRun.getClass().getName());
    System.out.println(fieldRun);
    traversePictures(fieldRun.getEmbeddedPictures());
   } else if (runElement instanceof XWPFHyperlinkRun) {
    XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
    System.out.println(hyperlinkRun.getClass().getName());
    System.out.println(hyperlinkRun);
    traversePictures(hyperlinkRun.getEmbeddedPictures());
   } else if (runElement instanceof XWPFRun) {
    XWPFRun run = (XWPFRun)runElement;
    System.out.println(run.getClass().getName());
    System.out.println(run);
    traversePictures(run.getEmbeddedPictures());
   } else if (runElement instanceof XWPFSDT) {
    XWPFSDT sDT = (XWPFSDT)runElement;
    System.out.println(sDT);
    System.out.println(sDT.getContent());
    //ToDo: The SDT may have traversable content too.
   }
  }
 }

 static void traverseTableCells(List<ICell> tableICells) throws Exception {
  for (ICell tableICell : tableICells) {
   if (tableICell instanceof XWPFSDTCell) {
    XWPFSDTCell sDTCell = (XWPFSDTCell)tableICell;
    System.out.println(sDTCell);
    //ToDo: The SDTCell may have traversable content too.
   } else if (tableICell instanceof XWPFTableCell) {
    XWPFTableCell tableCell = (XWPFTableCell)tableICell;
    System.out.println(tableCell);
    traverseBodyElements(tableCell.getBodyElements());
   }
  }
 }

 static void traverseTableRows(List<XWPFTableRow> tableRows) throws Exception {
  for (XWPFTableRow tableRow : tableRows) {
   System.out.println(tableRow);
   traverseTableCells(tableRow.getTableICells());
  }
 }

 static void traverseBodyElements(List<IBodyElement> bodyElements) throws Exception {
  for (IBodyElement bodyElement : bodyElements) {
   if (bodyElement instanceof XWPFParagraph) {
    XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
    System.out.println(paragraph);
    traverseRunElements(paragraph.getIRuns());
   } else if (bodyElement instanceof XWPFSDT) {
    XWPFSDT sDT = (XWPFSDT)bodyElement;
    System.out.println(sDT);
    System.out.println(sDT.getContent());
    //ToDo: The SDT may have traversable content too.
   } else if (bodyElement instanceof XWPFTable) {
    XWPFTable table = (XWPFTable)bodyElement;
    System.out.println(table);
    traverseTableRows(table.getRows());
   }
  }
 }

 public static void main(String[] args) throws Exception {

  String inFilePath = "./WordDocument.docx";

  XWPFDocument document = new XWPFDocument(new FileInputStream(inFilePath));
  traverseBodyElements(document.getBodyElements());

  document.close();
 }

}
import java.io.FileInputStream;
导入org.apache.poi.xwpf.usermodel.*;
导入java.util.List;
公共类WordReadAllContent{
静态void遍历结构(列表图片)引发异常{
用于(XWPFPicture图片:图片){
系统输出打印项次(图片);
XWPFPictureData=picture.getPictureData();
System.out.println(pictureData);
}
}
静态void TraverserElements(列表runElements)引发异常{
对于(IRunElement runElement:runElements){
if(XWPFFieldRun的runElement实例){
xwpffeeldrun fieldRun=(xwpffeeldrun)runElement;
System.out.println(fieldRun.getClass().getName());
系统输出打印LN(现场运行);
traversePictures(fieldRun.getEmbeddedPictures());
}else if(XWPFHyperlinkRun的runElement实例){
XWPFHyperlinkRun hyperlinkRun=(XWPFHyperlinkRun)runElement;
System.out.println(hyperlinkRun.getClass().getName());
System.out.println(hyperlinkRun);
traversePictures(hyperlinkRun.getEmbeddedPictures());
}else if(XWPFRun的runElement实例){
XWPFRun=(XWPFRun)runElement;
System.out.println(run.getClass().getName());
System.out.println(运行);
traversePictures(run.getEmbeddedPictures());
}else if(XWPFSDT的runElement实例){
XWPFSDT=(XWPFSDT)runElement;
系统输出打印项次(sDT);
System.out.println(sDT.getContent());
//ToDo:SDT可能也有可遍历的内容。
}
}
}
静态void traverseTableCells(列表表单元格)引发异常{
用于(ICell tableICell:tableICells){
if(XWPFSDTCell的tableICell实例){
XWPFSDTCell=(XWPFSDTCell)tablecell;
系统输出打印项次(sDTCell);
//ToDo:SDTCell可能也有可遍历的内容。
}else if(XWPFTableCell的TableCell实例){
XWPFTableCell表格单元格=(XWPFTableCell)表格单元格;
System.out.println(tableCell);
TraverseBayElements(tableCell.getBodyElements());
}
}
}
静态void traverseTableRows(List tableRows)引发异常{
对于(XWPFTableRow表格行:表格行){
System.out.println(tableRow);
traverseTableCells(tableRow.getTableICells());
}
}
静态void TraverseBayElements(List bodyElements)引发异常{
用于(IBodyElement bodyElement:bodyElements){
if(XWPFParagraph的bodyElement实例){
XWPFParagraph段落=(XWPFParagraph)bodyElement;
系统输出打印LN(段落);
遍历元素(段落.getIRuns());
}else if(XWPFSDT的bodyElement实例){
XWPFSDT sDT=(XWPFSDT)bodyElement;
系统输出打印项次(sDT);
System.out.println(sDT.getContent());
//ToDo:SDT可能也有可遍历的内容。
}else if(XWPFTable的bodyElement实例){
XWPFTable table=(XWPFTable)bodyElement;
系统输出打印项次(表);
traverseTableRows(table.getRows());
}
}
}
公共静态void main(字符串[]args)引发异常{
字符串inFilePath=“./WordDocument.docx”;
XWPFDocument document=新的XWPFDocument(新文件输入流(inFilePath));
traverseBodyElements(document.getBodyElements());
document.close();
}
}

这是一份工作草案。我肯定,我忘了什么。

这个问题在这里很难回答。其他人围绕“在ApachePOI中按word文档(docx)的顺序读取所有内容”的要求对整个库进行了编程。看,我可以给你一些解释,为什么图片不能像表格一样处理。图片不是document.xml的一部分。文档中的所有内容都是对图片数据的引用,这些图片数据存储在其他地方,在zip包中的不同xml文件中。像OpenXMLSDK这样的库提供了对整个zip包的访问,它可以处理您的请求,但需要更多的编码。显然,还有其他库可以按照您的要求执行^^。但我想你可能想理解“为什么”…@Cindy Meister:
Apache poi
还“提供对整个邮政包的访问”@AxelRichter Good to know:-)有时候,当我阅读评论时,我不清楚它是否可以到达所有的小角落…很好的例子,但BodyElement不包括一些元素,就像注释或脚注一样。@Ermintar:正确,但我已经告诉过了:这是一份工作草案。我肯定,我忘了什么。这是一个开始,当然你可以自由地改进和扩展它。
import java.io.FileInputStream;

import org.apache.poi.xwpf.usermodel.*;

import java.util.List;

public class WordReadAllContent {

 static void traversePictures(List<XWPFPicture> pictures) throws Exception {
  for (XWPFPicture picture : pictures) {
   System.out.println(picture);
   XWPFPictureData pictureData = picture.getPictureData();
   System.out.println(pictureData);
  }
 }

 static void traverseRunElements(List<IRunElement> runElements) throws Exception {
  for (IRunElement runElement : runElements) {
   if (runElement instanceof XWPFFieldRun) {
    XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
    System.out.println(fieldRun.getClass().getName());
    System.out.println(fieldRun);
    traversePictures(fieldRun.getEmbeddedPictures());
   } else if (runElement instanceof XWPFHyperlinkRun) {
    XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
    System.out.println(hyperlinkRun.getClass().getName());
    System.out.println(hyperlinkRun);
    traversePictures(hyperlinkRun.getEmbeddedPictures());
   } else if (runElement instanceof XWPFRun) {
    XWPFRun run = (XWPFRun)runElement;
    System.out.println(run.getClass().getName());
    System.out.println(run);
    traversePictures(run.getEmbeddedPictures());
   } else if (runElement instanceof XWPFSDT) {
    XWPFSDT sDT = (XWPFSDT)runElement;
    System.out.println(sDT);
    System.out.println(sDT.getContent());
    //ToDo: The SDT may have traversable content too.
   }
  }
 }

 static void traverseTableCells(List<ICell> tableICells) throws Exception {
  for (ICell tableICell : tableICells) {
   if (tableICell instanceof XWPFSDTCell) {
    XWPFSDTCell sDTCell = (XWPFSDTCell)tableICell;
    System.out.println(sDTCell);
    //ToDo: The SDTCell may have traversable content too.
   } else if (tableICell instanceof XWPFTableCell) {
    XWPFTableCell tableCell = (XWPFTableCell)tableICell;
    System.out.println(tableCell);
    traverseBodyElements(tableCell.getBodyElements());
   }
  }
 }

 static void traverseTableRows(List<XWPFTableRow> tableRows) throws Exception {
  for (XWPFTableRow tableRow : tableRows) {
   System.out.println(tableRow);
   traverseTableCells(tableRow.getTableICells());
  }
 }

 static void traverseBodyElements(List<IBodyElement> bodyElements) throws Exception {
  for (IBodyElement bodyElement : bodyElements) {
   if (bodyElement instanceof XWPFParagraph) {
    XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
    System.out.println(paragraph);
    traverseRunElements(paragraph.getIRuns());
   } else if (bodyElement instanceof XWPFSDT) {
    XWPFSDT sDT = (XWPFSDT)bodyElement;
    System.out.println(sDT);
    System.out.println(sDT.getContent());
    //ToDo: The SDT may have traversable content too.
   } else if (bodyElement instanceof XWPFTable) {
    XWPFTable table = (XWPFTable)bodyElement;
    System.out.println(table);
    traverseTableRows(table.getRows());
   }
  }
 }

 public static void main(String[] args) throws Exception {

  String inFilePath = "./WordDocument.docx";

  XWPFDocument document = new XWPFDocument(new FileInputStream(inFilePath));
  traverseBodyElements(document.getBodyElements());

  document.close();
 }

}