Java-XLSX解析&;数据库导出
我有一个excel,它填充了大约50k-60k行 我必须将excel内容上传到MySQL,通常我使用ApachePOI读取并上传到MySQL,但是这个文件不能使用ApachePOI读取,因为文件太大了 有人能指导我怎么做吗?下面是我使用ApachePOI将内容上传到MySQL的示例代码(它适用于一些包含1000-2000行的小xlsx文件)Java-XLSX解析&;数据库导出,java,mysql,export,xlsx,Java,Mysql,Export,Xlsx,我有一个excel,它填充了大约50k-60k行 我必须将excel内容上传到MySQL,通常我使用ApachePOI读取并上传到MySQL,但是这个文件不能使用ApachePOI读取,因为文件太大了 有人能指导我怎么做吗?下面是我使用ApachePOI将内容上传到MySQL的示例代码(它适用于一些包含1000-2000行的小xlsx文件) publicstaticvoid上传crosssellcorpcard(FileItem文件,stringdbtable){ System.out.prin
publicstaticvoid上传crosssellcorpcard(FileItem文件,stringdbtable){
System.out.println(“UploadUtil Running”+file.getFileName().toString());
试一试{
对于(int i=0;i您可以尝试使用Apache POI SAX-请阅读上的-->XSSF和SAX(事件API)
您可以像读取xml文件一样读取60k行甚至100k行的整个excel。唯一需要注意的是空单元格,因为空单元格的xml标记只会跳过单元格,但您可能希望更新db表中表示空值的单元格的null值
解决方案-->您可以读取每一行并在循环中激发insert语句。如果出现间隙,则通过监视单元格地址来监视空单元格,然后检查相应的列名,并相应地使用null值更新insert语句
我希望这对您有所帮助。下面的示例代码读取excel并将其存储在ArrayList的ArrayList中,以表格式表示。我正在控制台中打印消息-“新行开始”,然后再开始读取和打印行。以及在打印单元格值本身之前每个值的单元格编号
我没有考虑空单元格的单元格间距,但您可以根据查找单元格间距来编码,因为在我的情况下,我没有空单元格。
在控制台中查找单元格地址,以帮助您发现任何间隙并按您的意愿进行处理
运行这段代码,对我来说效果很好。别忘了添加xmlbeans-2.3.0.jar
导入语句所需的其他JAR
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.commons.lang3.time.DurationFormatUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class ExcelToStringArray implements Cloneable {
public static ArrayList<ArrayList<StringBuilder>> stringArrayToReturn = new ArrayList<ArrayList<StringBuilder>>();
public static ArrayList<StringBuilder> retainedString;
public static Integer lineCounter = 0;
public ArrayList<ArrayList<StringBuilder>> GetSheetInStringArray(String PathtoFilename, String rId)
throws Exception {
ExcelToStringArray myParser = new ExcelToStringArray();
myParser.processOneSheet(PathtoFilename, rId);
return stringArrayToReturn;
}
public void processOneSheet(String PathtoFilename, String rId) throws Exception {
OPCPackage pkg = OPCPackage.open(PathtoFilename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
InputStream sheet = r.getSheet(rId);
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
private class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
if (name.equals("row")) {
retainedString = new ArrayList<StringBuilder>();
if (retainedString.isEmpty()) {
stringArrayToReturn.add(retainedString);
retainedString.clear();
}
System.out.println("New row begins");
retainedString.add(new StringBuilder(lineCounter.toString()));
lineCounter++;
}
// c => cell
if (name.equals("c")) {
// Print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name) throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if (name.equals("v")) {
System.out.println(lastContents);
// value of cell what it string or number
retainedString.add(new StringBuilder(lastContents));
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
lastContents += new String(ch, start, length);
}
}
public static void main(String[] args) throws Exception {
StopWatch watch = new StopWatch();
watch.start();
ExcelToStringArray generate = new ExcelToStringArray();
// rID1 is first sheet in my workbook for rId2 for second sheet and so
// on.
generate.GetSheetInStringArray("D:\\Users\\NIA\\Desktop\\0000_MasterTestSuite.xlsx", "rId10");
watch.stop();
System.out.println(DurationFormatUtils.formatDurationWords(watch.getTime(), true, true));
System.out.println("done");
System.out.println(generate.stringArrayToReturn);
}
}
import java.io.InputStream;
导入java.util.ArrayList;
导入org.apache.commons.lang3.time.DurationFormatUtils;
导入org.apache.commons.lang3.time.StopWatch;
导入org.apache.poi.xssf.eventusermodel.XSSFReader;
导入org.apache.poi.xssf.model.SharedStringsTable;
导入org.apache.poi.xssf.usermodel.XSSFRichTextString;
导入org.apache.poi.openxml4j.opc.OPCPackage;
导入org.xml.sax.Attributes;
导入org.xml.sax.ContentHandler;
导入org.xml.sax.InputSource;
导入org.xml.sax.SAXException;
导入org.xml.sax.XMLReader;
导入org.xml.sax.helpers.DefaultHandler;
导入org.xml.sax.helpers.XMLReaderFactory;
公共类ExcelToStringaray实现可克隆{
公共静态ArrayList stringArrayToReturn=新ArrayList();
公共静态数组列表保留;
公共静态整数lineCounter=0;
公共ArrayList GetSheetInStringArray(字符串路径文件名,字符串rId)
抛出异常{
ExcelToStringaray myParser=新建ExcelToStringaray();
processOneSheet(路径文件名,rId);
返回字符串返回;
}
public void processOneSheet(字符串PathtoFilename,字符串rId)引发异常{
OPCPackage pkg=OPCPackage.open(路径文件名);
XSSFReader r=新XSSFReader(pkg);
SharedStringsTable sst=r.getSharedStringsTable();
XMLReader parser=fetchSheetParser(sst);
InputStream sheet=r.getSheet(rId);
InputSource sheetSource=新的InputSource(表);
parser.parse(sheetSource);
sheet.close();
}
公共XMLReader fetchSheetParser(SharedStringsTable sst)引发SAXException{
XMLReader parser=XMLReaderFactory.createXMLReader(“org.apache.xerces.parsers.SAXParser”);
ContentHandler=新的SheetHandler(sst);
setContentHandler(handler);
返回解析器;
}
私有类SheetHandler扩展了DefaultHandler{
私人股本;
私有字符串内容;
私有布尔连接字符串;
专用SheetHandler(SharedStringsTable sst){
这个.sst=sst;
}
public void startElement(字符串uri、字符串localName、字符串名称、属性)引发SAXException{
if(name.equals(“行”)){
retainedString=newarraylist();
if(retainedString.isEmpty()){
stringArrayToReturn.add(保留字符串);
restainedstring.clear();
}
System.out.println(“新行开始”);
添加(新的StringBuilder(lineCounter.toString());
lineCounter++;
}
//c=>单元
如果(名称等于(“c”)){
//打印单元格引用
System.out.print(attributes.getValue(“r”)+“-”;
//System.out.print(attributes.getValue(“r”)+“-”;
//确定该值是否为SST中的索引
字符串cellType=attributes.getValue(“t”);
if(cellType!=null&&cellType.equals(“s”)){
nextIsString=true;
}否则{
nextiststring=false;
}
}
//清除内容缓存
lastContents=“”;
}
公共void endElement(字符串uri、字符串localName、字符串名称)引发SAXException{
//根据需要处理最后的内容。
//现在就做,因为characters()可能会被多次调用
if(nextIsString){
int idx=Integer.parseInt(lastContents);
lastContents=newxssfrichtextstring(sst.getEntryAt(idx)).toString();
nextiststring=false;
}
//v=>单元格的内容
//看到字符串内容后输出
如果(名称等于(“v”)){
System.out.println(lastContents);
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.commons.lang3.time.DurationFormatUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class ExcelToStringArray implements Cloneable {
public static ArrayList<ArrayList<StringBuilder>> stringArrayToReturn = new ArrayList<ArrayList<StringBuilder>>();
public static ArrayList<StringBuilder> retainedString;
public static Integer lineCounter = 0;
public ArrayList<ArrayList<StringBuilder>> GetSheetInStringArray(String PathtoFilename, String rId)
throws Exception {
ExcelToStringArray myParser = new ExcelToStringArray();
myParser.processOneSheet(PathtoFilename, rId);
return stringArrayToReturn;
}
public void processOneSheet(String PathtoFilename, String rId) throws Exception {
OPCPackage pkg = OPCPackage.open(PathtoFilename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
InputStream sheet = r.getSheet(rId);
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
private class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
if (name.equals("row")) {
retainedString = new ArrayList<StringBuilder>();
if (retainedString.isEmpty()) {
stringArrayToReturn.add(retainedString);
retainedString.clear();
}
System.out.println("New row begins");
retainedString.add(new StringBuilder(lineCounter.toString()));
lineCounter++;
}
// c => cell
if (name.equals("c")) {
// Print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name) throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if (name.equals("v")) {
System.out.println(lastContents);
// value of cell what it string or number
retainedString.add(new StringBuilder(lastContents));
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
lastContents += new String(ch, start, length);
}
}
public static void main(String[] args) throws Exception {
StopWatch watch = new StopWatch();
watch.start();
ExcelToStringArray generate = new ExcelToStringArray();
// rID1 is first sheet in my workbook for rId2 for second sheet and so
// on.
generate.GetSheetInStringArray("D:\\Users\\NIA\\Desktop\\0000_MasterTestSuite.xlsx", "rId10");
watch.stop();
System.out.println(DurationFormatUtils.formatDurationWords(watch.getTime(), true, true));
System.out.println("done");
System.out.println(generate.stringArrayToReturn);
}
}