Java 如何避免跳过ApachePOI中的空白行或空白列
当我使用Apace POI解析文件时,将跳过空行,并返回包含非空行的字符串数组列表 我如何告诉API不要跳过读取空行或空列 要读取的代码以某种方式跳过了没有数据的行Java 如何避免跳过ApachePOI中的空白行或空白列,java,apache-poi,Java,Apache Poi,当我使用Apace POI解析文件时,将跳过空行,并返回包含非空行的字符串数组列表 我如何告诉API不要跳过读取空行或空列 要读取的代码以某种方式跳过了没有数据的行 XExcelFileReader fileReader = new XExcelFileReader(excelByteArray, sheetFromWhichToRead); dataFromFile = fileReader.readRows(); 用于从类XExcelFileRea
XExcelFileReader fileReader = new XExcelFileReader(excelByteArray, sheetFromWhichToRead);
dataFromFile = fileReader.readRows();
用于从类XExcelFileReader读取数据。变量dataFromFile是字符串数组的列表
这是读取数据行的代码:
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Pattern;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
/**
* This class is responsible for reading the row contents for a given byte array and sheet name
*/
public class XExcelFileReader {
/**
* This is the Row Num of the current row that was read
*/
private int rowNum = 0;
/**
* The OPCPackage is the package used to laod the Input Stream used for only .xlsx Files
*/
private OPCPackage opcPkg;
/**
* These are the String Tables that are read from the Excel File
*/
private ReadOnlySharedStringsTable stringsTable;
/**
* The XML Streaming API will be used
*/
private XMLStreamReader xmlReader;
/** The styles table which has formatting information about cells. */
private StylesTable styles;
/**
* @param excelByteArray the excel byte array
* @param sheetFromWhichToRead the excel sheet from which to read
* @throws Exception the exception
*/
public XExcelFileReader(final byte[] excelByteArray, final String sheetFromWhichToRead) throws Exception {
InputStream excelStream = new ByteArrayInputStream(excelByteArray);
opcPkg = OPCPackage.open(excelStream);
this.stringsTable = new ReadOnlySharedStringsTable(opcPkg);
XSSFReader xssfReader = new XSSFReader(opcPkg);
styles = xssfReader.getStylesTable();
XMLInputFactory factory = XMLInputFactory.newInstance();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
InputStream inputStream = null;
while (iter.hasNext()) {
inputStream = iter.next();
String tempSheetName = iter.getSheetName();
if (StringUtils.isNotEmpty(tempSheetName)) {
tempSheetName = tempSheetName.trim();
if (tempSheetName.equals(sheetFromWhichToRead)) {
break;
}
}
}
xmlReader = factory.createXMLStreamReader(inputStream);
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("sheetData")) {
break;
}
}
}
}
/**
* @return rowNum
*/
public final int rowNum() {
return rowNum;
}
/**
* @return List<String[]> List of String array which can hold the content
* @throws XMLStreamException the XMLStreamException
*/
public final List<String[]> readRows() throws XMLStreamException {
String elementName = "row";
List<String[]> dataRows = new ArrayList<String[]>();
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals(elementName)) {
rowNum++;
dataRows.add(getDataRow());
// TODO need to see if the batch Size is required
// if (dataRows.size() == batchSize)
// break;
}
}
}
return dataRows;
}
/**
* @return String [] of Row Data
* @throws XMLStreamException the XMLStreamException
*/
private String[] getDataRow() throws XMLStreamException {
List<String> rowValues = new ArrayList<String>();
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("c")) {
CellReference cellReference = new CellReference(xmlReader.getAttributeValue(null, "r"));
// Fill in the possible blank cells!
while (rowValues.size() < cellReference.getCol()) {
rowValues.add("");
}
String cellType = xmlReader.getAttributeValue(null, "t");
String cellStyleStr = xmlReader.getAttributeValue(null, "s");
rowValues.add(getCellValue(cellType, cellStyleStr));
}
} else if (xmlReader.isEndElement() && xmlReader.getLocalName().equals("row")) {
break;
}
}
return rowValues.toArray(new String[rowValues.size()]);
}
/**
* @param cellType the cell type
* @param cellStyleStr the cell style
* @return cell content the cell value
* @throws XMLStreamException the XMLStreamException
*/
private String getCellValue(final String cellType, final String cellStyleStr) throws XMLStreamException {
String value = ""; // by default
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("v")) {
if (cellType != null && cellType.equals("s")) {
int idx = Integer.parseInt(xmlReader.getElementText());
String s = stringsTable.getEntryAt(idx);
return new XSSFRichTextString(s).toString();
}
if (cellStyleStr != null) {
String cellValue = xmlReader.getElementText();
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = styles.getStyleAt(styleIndex);
short formatIndex = style.getDataFormat();
if (!isValidDouble(cellValue)) {
return cellValue;
}
double doubleVal = Double.valueOf(cellValue);
boolean isValidExcelDate = HSSFDateUtil.isInternalDateFormat(formatIndex);
Date date = null;
if (isValidExcelDate) {
date = HSSFDateUtil.getJavaDate(doubleVal);
String dateStr = dateAsString(date);
return dateStr;
}
if (!(doubleVal == Math.floor(doubleVal))) {
return Double.toString(doubleVal);
}
return cellValue;
}
else {
return xmlReader.getElementText();
}
}
} else if (xmlReader.isEndElement() && xmlReader.getLocalName().equals("c")) {
break;
}
}
return value;
}
/**
* To check whether the incoming value can be used in the Double utility method Double.valueOf() to prevent
* NumberFormatException.
* @param stringVal - String to be validated.
* @return - true if it is a valid String which can be passed into Double.valueOf method. <br/> For more information
* refer- <a>https://docs.oracle.com/javase/7/docs/api/java/lang/Double.html#valueOf(java.lang.String)</a>
*/
private boolean isValidDouble(String stringVal) {
final String Digits = "(\\p{Digit}+)";
final String HexDigits = "(\\p{XDigit}+)";
// an exponent is 'e' or 'E' followed by an optionally
// signed decimal integer.
final String Exp = "[eE][+-]?" + Digits;
final String fpRegex = ("[\\x00-\\x20]*" + // Optional leading "whitespace"
"[+-]?(" + // Optional sign character
"NaN|" + // "NaN" string
"Infinity|" + // "Infinity" string
// A decimal floating-point string representing a finite positive
// number without a leading sign has at most five basic pieces:
// Digits . Digits ExponentPart FloatTypeSuffix
//
// Since this method allows integer-only strings as input
// in addition to strings of floating-point literals, the
// two sub-patterns below are simplifications of the grammar
// productions from the Java Language Specification, 2nd
// edition, section 3.10.2.
// Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt
"(((" + Digits + "(\\.)?(" + Digits + "?)(" + Exp + ")?)|" +
// . Digits ExponentPart_opt FloatTypeSuffix_opt
"(\\.(" + Digits + ")(" + Exp + ")?)|" +
// Hexadecimal strings
"((" +
// 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt
"(0[xX]" + HexDigits + "(\\.)?)|" +
// 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt
"(0[xX]" + HexDigits + "?(\\.)" + HexDigits + ")" +
")[pP][+-]?" + Digits + "))" + "[fFdD]?))" + "[\\x00-\\x20]*");// Optional trailing "whitespace"
if (Pattern.matches(fpRegex, stringVal))
return true;
else {
return false;
}
}
/**
* Date as string.
* @param date the date
* @return the string
*/
public static String dateAsString(final Date date) {
String dateAsString = null;
if (date != null) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
dateAsString = sdf.format(date);
}
return dateAsString;
}
@Override
protected final void finalize() throws Throwable {
if (opcPkg != null) {
opcPkg.close();
}
super.finalize();
}
}
以下是POI网站上关于阅读缺少行的工作表的说明:
for (int rowNum = rowStart; rowNum < rowEnd; rowNum++) {
Row r = sheet.getRow(rowNum);
if (r == null) {
// This whole row is empty
// Handle it as needed
continue;
}
int lastColumn = Math.max(r.getLastCellNum(), MY_MINIMUM_COLUMN_COUNT);
for (int cn = 0; cn < lastColumn; cn++) {
Cell c = r.getCell(cn, Row.RETURN_BLANK_AS_NULL);
if (c == null) {
// The spreadsheet is empty in this cell
} else {
// Do something useful with the cell's contents
}
}
}
你真的可以在那里得到很多答案。您不应该直接解析XML。即使是高级用户模型中缺少的东西,通常也可以通过XMLBeans生成的CT类找到它。以下是POI网站上关于阅读缺少行的工作表的说明:
for (int rowNum = rowStart; rowNum < rowEnd; rowNum++) {
Row r = sheet.getRow(rowNum);
if (r == null) {
// This whole row is empty
// Handle it as needed
continue;
}
int lastColumn = Math.max(r.getLastCellNum(), MY_MINIMUM_COLUMN_COUNT);
for (int cn = 0; cn < lastColumn; cn++) {
Cell c = r.getCell(cn, Row.RETURN_BLANK_AS_NULL);
if (c == null) {
// The spreadsheet is empty in this cell
} else {
// Do something useful with the cell's contents
}
}
}
你真的可以在那里得到很多答案。您不应该直接解析XML。即使是高级用户模型中缺少的东西,通常也可以通过XMLBeans生成的CT类找到。取决于您没有发布的代码,您可以在getRow方法调用周围发布一段代码片段?读取数据的代码与问题一起添加。您需要花费大量精力来重写POI API。您不应该仅仅为了阅读电子表格而使用javax.xml.stream包。实际上,您需要的一切都在相应的usermodel包中。查看链接到的文档Axel Richter。取决于您没有发布的代码。您可以发布关于getRow方法调用的代码片段吗?读取数据的代码与问题一起添加。您需要花费大量精力来重写POI API。您不应该仅仅为了阅读电子表格而使用javax.xml.stream包。实际上,您需要的一切都在相应的usermodel包中。请参阅Axel Richter链接到的文档。如果我们能够相信ApachePOI文档和Busy Developers Guide是最新的,那就更好了。目前,Row.RETURN_BLANK_AS_NULL对于版本3.17是错误的,并且docu声明为已弃用的许多方法在版本3.17中没有弃用。例如Cell.getCellTypeEnum。所以用户会感到困惑和沮丧。@AxelRichter是的,出版文档的祸根。我会记下这一点,但是如果你有时间并且感兴趣,也许你可以为这个页面创建一个补丁。上面的代码在2020年仍然有效吗?我是说有什么不推荐的方法吗?有更好的或替代的解决方案吗?如果我们相信ApachePOI文档和《忙碌的开发人员指南》是最新的,那就更好了。目前,Row.RETURN_BLANK_AS_NULL对于版本3.17是错误的,并且docu声明为已弃用的许多方法在版本3.17中没有弃用。例如Cell.getCellTypeEnum。所以用户会感到困惑和沮丧。@AxelRichter是的,出版文档的祸根。我会记下这一点,但是如果你有时间并且感兴趣,也许你可以为这个页面创建一个补丁。上面的代码在2020年仍然有效吗?我是说有什么不推荐的方法吗?有更好的或替代的解决方案吗?