使用java读取word文档

使用java读取word文档,java,apache-poi,docx,Java,Apache Poi,Docx,我正在尝试使用java从docx文件读取数据。数据是表。是否有方法遍历表格单元格并提取单元格数据?这将对您有所帮助 以上是使用ApachePOI实现的 以下是链接中的代码: package org.apache.poi.xwpf.usermodel; import java.io.FileOutputStream; import java.math.BigInteger; import java.util.List; import org.openxmlformats.schema

我正在尝试使用java从docx文件读取数据。数据是表。是否有方法遍历表格单元格并提取单元格数据?

这将对您有所帮助

以上是使用ApachePOI实现的

以下是链接中的代码:

    package org.apache.poi.xwpf.usermodel;

import java.io.FileOutputStream;
import java.math.BigInteger;
import java.util.List;

import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHeight;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTShd;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTblPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTrPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTVerticalJc;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STShd;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STVerticalJc;

/**
 * This program creates a simple WordprocessingML table using POI XWPF API, and
 * a more complex, styled table using both XWPF and ooxml-schema. It's possible
 * that not all referenced wordprocessingml classes are defined in
 * poi-ooxml-schemas-3.8-beta4. If this is the case, you'll need to use the full
 * ooxml-schemas.jar library.
 *
 * @author gisella bronzetti (original)
 * @author Gregg Morris (styled table)
 */
public class SimpleTable {

    public static void main(String[] args) throws Exception {
        try {
            createSimpleTable();
        }
        catch(Exception e) {
            System.out.println("Error trying to create simple table.");
            throw(e);
        }
        try {
            createStyledTable();
        }
        catch(Exception e) {
            System.out.println("Error trying to create styled table.");
            throw(e);
        }
    }

    public static void createSimpleTable() throws Exception {
        XWPFDocument doc = new XWPFDocument();

        XWPFTable table = doc.createTable(3, 3);

        table.getRow(1).getCell(1).setText("EXAMPLE OF TABLE");

        // table cells have a list of paragraphs; there is an initial
        // paragraph created when the cell is created. If you create a
        // paragraph in the document to put in the cell, it will also
        // appear in the document following the table, which is probably
        // not the desired result.
        XWPFParagraph p1 = table.getRow(0).getCell(0).getParagraphs().get(0);

        XWPFRun r1 = p1.createRun();
        r1.setBold(true);
        r1.setText("The quick brown fox");
        r1.setItalic(true);
        r1.setFontFamily("Courier");
        r1.setUnderline(UnderlinePatterns.DOT_DOT_DASH);
        r1.setTextPosition(100);

        table.getRow(2).getCell(2).setText("only text");

        FileOutputStream out = new FileOutputStream("simpleTable.docx");
        doc.write(out);
        out.close();
    }

    /**
     * Create a table with some row and column styling. I "manually" add the
     * style name to the table, but don't check to see if the style actually
     * exists in the document. Since I'm creating it from scratch, it obviously
     * won't exist. When opened in MS Word, the table style becomes "Normal".
     * I manually set alternating row colors. This could be done using Themes,
     * but that's left as an exercise for the reader. The cells in the last
     * column of the table have 10pt. "Courier" font.
     * I make no claims that this is the "right" way to do it, but it worked
     * for me. Given the scarcity of XWPF examples, I thought this may prove
     * instructive and give you ideas for your own solutions.

     * @throws Exception
     */
    public static void createStyledTable() throws Exception {
        // Create a new document from scratch
        XWPFDocument doc = new XWPFDocument();
        // -- OR --
        // open an existing empty document with styles already defined
        //XWPFDocument doc = new XWPFDocument(new FileInputStream("base_document.docx"));

        // Create a new table with 6 rows and 3 columns
        int nRows = 6;
        int nCols = 3;
        XWPFTable table = doc.createTable(nRows, nCols);

        // Set the table style. If the style is not defined, the table style
        // will become "Normal".
        CTTblPr tblPr = table.getCTTbl().getTblPr();
        CTString styleStr = tblPr.addNewTblStyle();
        styleStr.setVal("StyledTable");

        // Get a list of the rows in the table
        List<XWPFTableRow> rows = table.getRows();
        int rowCt = 0;
        int colCt = 0;
        for (XWPFTableRow row : rows) {
            // get table row properties (trPr)
            CTTrPr trPr = row.getCtRow().addNewTrPr();
            // set row height; units = twentieth of a point, 360 = 0.25"
            CTHeight ht = trPr.addNewTrHeight();
            ht.setVal(BigInteger.valueOf(360));

            // get the cells in this row
            List<XWPFTableCell> cells = row.getTableCells();
            // add content to each cell
            for (XWPFTableCell cell : cells) {
                // get a table cell properties element (tcPr)
                CTTcPr tcpr = cell.getCTTc().addNewTcPr();
                // set vertical alignment to "center"
                CTVerticalJc va = tcpr.addNewVAlign();
                va.setVal(STVerticalJc.CENTER);

                // create cell color element
                CTShd ctshd = tcpr.addNewShd();
                ctshd.setColor("auto");
                ctshd.setVal(STShd.CLEAR);
                if (rowCt == 0) {
                    // header row
                    ctshd.setFill("A7BFDE");
                }
                else if (rowCt % 2 == 0) {
                    // even row
                    ctshd.setFill("D3DFEE");
                }
                else {
                    // odd row
                    ctshd.setFill("EDF2F8");
                }

                // get 1st paragraph in cell's paragraph list
                XWPFParagraph para = cell.getParagraphs().get(0);
                // create a run to contain the content
                XWPFRun rh = para.createRun();
                // style cell as desired
                if (colCt == nCols - 1) {
                    // last column is 10pt Courier
                    rh.setFontSize(10);
                    rh.setFontFamily("Courier");
                }
                if (rowCt == 0) {
                    // header row
                    rh.setText("header row, col " + colCt);
                    rh.setBold(true);
                    para.setAlignment(ParagraphAlignment.CENTER);
                }
                else if (rowCt % 2 == 0) {
                    // even row
                    rh.setText("row " + rowCt + ", col " + colCt);
                    para.setAlignment(ParagraphAlignment.LEFT);
                }
                else {
                    // odd row
                    rh.setText("row " + rowCt + ", col " + colCt);
                    para.setAlignment(ParagraphAlignment.LEFT);
                }
                colCt++;
            } // for cell
            colCt = 0;
            rowCt++;
        } // for row

        // write the file
        FileOutputStream out = new FileOutputStream("styledTable.docx");
        doc.write(out);
        out.close();
    }

}
package org.apache.poi.xwpf.usermodel;
导入java.io.FileOutputStream;
导入java.math.biginger;
导入java.util.List;
导入org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHeight;
导入org.openxmlformats.schemas.wordprocessingml.x2006.main.CTShd;
导入org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
导入org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTblPr;
导入org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr;
导入org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTrPr;
导入org.openxmlformats.schemas.wordprocessingml.x2006.main.CTVerticalJc;
导入org.openxmlformats.schemas.wordprocessingml.x2006.main.STShd;
导入org.openxmlformats.schemas.wordprocessingml.x2006.main.STVerticalJc;
/**
*该程序使用POI XWPF API创建一个简单的WordprocessingML表,并
*使用XWPF和ooxml模式的更复杂、样式化的表。这是可能的
*并非所有引用的wordprocessingml类都在中定义
*poi-ooxml-schemas-3.8-beta4。如果是这种情况,您需要使用完整的
*ooxml-schemas.jar库。
*
*@作者吉塞拉·布朗泽蒂(原件)
*@作者格雷格·莫里斯(风格表)
*/
公共类可简化{
公共静态void main(字符串[]args)引发异常{
试一试{
createSimpleTable();
}
捕获(例外e){
System.out.println(“尝试创建简单表时出错”);
投掷(e);
}
试一试{
createStyledTable();
}
捕获(例外e){
System.out.println(“尝试创建样式表时出错”);
投掷(e);
}
}
public static void createSimpleTable()引发异常{
XWPFDocument doc=新XWPFDocument();
XWPFTable table=doc.createTable(3,3);
table.getRow(1).getCell(1).setText(“表的示例”);
//表格单元格有一个段落列表;有一个首字母
//创建单元格时创建的段落。如果创建
//将文件中的段落放在单元格中,它也将
//出现在表格后面的文档中,可能是
//不是预期的结果。
XWPFParagraph p1=table.getRow(0.getCell(0.getParagraph().get(0);
XWPFRun r1=p1.createRun();
r1.挫折(真);
r1.setText(“敏捷的棕色狐狸”);
r1.设置斜体(真);
r1.setFontFamily(“信使”);
r1.设置下划线(下划线模式.点划线);
r1.设置位置(100);
表.getRow(2).getCell(2).setText(“仅文本”);
FileOutputStream out=新的FileOutputStream(“simpleTable.docx”);
写(出)文件;
out.close();
}
/**
*创建一个具有一些行和列样式的表
*将样式名称添加到表中,但不要检查样式是否实际存在
*存在于文档中。因为我是从头开始创建的,所以
*将不存在。在MS Word中打开时,表格样式将变为“正常”。
*我手动设置交替行颜色。这可以使用主题来完成,
*但这是留给读者的一个练习。最后
*表格的列有10磅的“Courier”字体。
*我没有声称这是一种“正确”的方法,但它奏效了
*对我来说,考虑到XWPF示例的稀缺性,我认为这可能证明
*具有启发性,并为您自己的解决方案提供想法。
*@抛出异常
*/
public static void createStyledTable()引发异常{
//从头开始创建新文档
XWPFDocument doc=新XWPFDocument();
//或者--
//打开已定义样式的现有空文档
//XWPFDocument doc=新的XWPFDocument(新文件输入流(“base_document.docx”);
//创建一个包含6行3列的新表
int nRows=6;
int nCols=3;
XWPFTable table=doc.createTable(nRows,nCols);
//设置表格样式。如果未定义样式,则表格样式
//将变得“正常”。
CTTblPr tblPr=table.getCTTbl().getTblPr();
CTString styleStr=tblPr.addnewtbstyle();
styleStr.setVal(“StyledTable”);
//获取表中的行列表
列表行=table.getRows();
int-rowCt=0;
int colCt=0;
用于(XWPFTableRow行:行){
//获取表行属性(trPr)
CTTrPr trPr=row.getCtRow().addnewtrp();
//设置行高;单位=点的二十分之一,360=0.25“
CTHeight ht=trPr.addnewthheight();
ht.setVal(biginger.valueOf(360));
//获取这一行中的单元格
列表单元格=行。getTableCells();
//向每个单元格添加内容
用于(XWPFTableCell单元:单元){
//获取表格单元格属性元素(tcPr)
CTTcPr tcpr=cell.getCTTc().addNewTcPr();
//将垂直对齐设置为“中心”
CTVerticalJc va=tcpr.addNewVAlign();
va.setVal(STVerticalJc.CENTER);
//创建单元格颜色元素
CTShd CTShd=tcpr.addNewShd();
ctshd.setColor(“自动”);
ctshd.setVal(STShd.CLEAR);
如果(rowCt==0){
//标题行
ctshd.setFill(“A7BFDE”);
}
否则如果(行%2==0){
//偶数行
ctshd.setFill(“D3DFEE”);
}
否则{
//奇数行