在Java中创建大型csv文件变得非常缓慢
我在尝试从另一个csv文件开始创建csv文件时遇到性能问题。 以下是原始文件的外观:在Java中创建大型csv文件变得非常缓慢,java,performance,csv,Java,Performance,Csv,我在尝试从另一个csv文件开始创建csv文件时遇到性能问题。 以下是原始文件的外观: country,state,co,olt,olu,splitter,ont,cpe,cpe.latitude,cpe.longitude,cpe.customer_class,cpe.phone,cpe.ip,cpe.subscriber_id COUNTRY-0001,STATE-0001,CO-0001,OLT-0001,OLU0001,SPLITTER-0001,ONT-0001,CPE-0001,28.
country,state,co,olt,olu,splitter,ont,cpe,cpe.latitude,cpe.longitude,cpe.customer_class,cpe.phone,cpe.ip,cpe.subscriber_id
COUNTRY-0001,STATE-0001,CO-0001,OLT-0001,OLU0001,SPLITTER-0001,ONT-0001,CPE-0001,28.21487,77.451775,ALL,SIP:+674100002743@IMS.COMCAST.NET,SIP:E28EDADA06B2@IMS.COMCAST.NET,CPE_SUBSCRIBER_ID-QHLHW4
COUNTRY-0001,STATE-0002,CO-0002,OLT-0002,OLU0002,SPLITTER-0002,ONT-0002,CPE-0002,28.294018,77.068924,ALL,SIP:+796107443092@IMS.COMCAST.NET,SIP:58DD999D6466@IMS.COMCAST.NET,CPE_SUBSCRIBER_ID-AH8NJQ
可能会有数百万条这样的线路,我已经检测到1.280.000条线路的问题
这是算法:
File csvInputFile = new File(csv_path);
int blockSize = 409600;
brCsvInputFile = new BufferedReader(frCsvInputFile, blockSize);
String line = null;
StringBuilder sbIntermediate = new StringBuilder();
skipFirstLine(brCsvInputFile);
while ((line = brCsvInputFile.readLine()) != null) {
createIntermediateStringBuffer(sbIntermediate, line.split(REGEX_COMMA));
}
private static void skipFirstLine(BufferedReader br) throws IOException {
String line = br.readLine();
String[] splitLine = line.split(REGEX_COMMA);
LOGGER.debug("First line detected! ");
createIndex(splitLine);
createIntermediateIndex(splitLine);
}
private static void createIndex(String[] splitLine) {
LOGGER.debug("START method createIndex.");
for (int i = 0; i < splitLine.length; i++)
headerIndex.put(splitLine[i], i);
printMap(headerIndex);
LOGGER.debug("COMPLETED method createIndex.");
}
private static void createIntermediateIndex(String[] splitLine) {
LOGGER.debug("START method createIntermediateIndex.");
com.tekcomms.c2d.xml.model.v2.Metadata_element[] metadata_element = null;
String[] servicePath = newTopology.getElement().getEntity().getService_path().getLevel();
if (newTopology.getElement().getMetadata() != null)
metadata_element = newTopology.getElement().getMetadata().getMetadata_element();
LOGGER.debug(servicePath.toString());
LOGGER.debug(metadata_element.toString());
headerIntermediateIndex.clear();
int indexIntermediateId = 0;
for (int i = 0; i < servicePath.length; i++) {
String level = servicePath[i];
LOGGER.debug("level is: " + level);
headerIntermediateIndex.put(level, indexIntermediateId);
indexIntermediateId++;
// its identificator is going to be located to the next one
headerIntermediateIndex.put(level + "ID", indexIntermediateId);
indexIntermediateId++;
}
// adding cpe.latitude,cpe.longitude,cpe.customer_class, it could be
// better if it would be metadata as well.
String labelLatitude = newTopology.getElement().getEntity().getLatitude();
// indexIntermediateId++;
headerIntermediateIndex.put(labelLatitude, indexIntermediateId);
String labelLongitude = newTopology.getElement().getEntity().getLongitude();
indexIntermediateId++;
headerIntermediateIndex.put(labelLongitude, indexIntermediateId);
String labelCustomerClass = newTopology.getElement().getCustomer_class();
indexIntermediateId++;
headerIntermediateIndex.put(labelCustomerClass, indexIntermediateId);
// adding metadata
// cpe.phone,cpe.ip,cpe.subscriber_id,cpe.vendor,cpe.model,cpe.customer_status,cpe.contact_telephone,cpe.address,
// cpe.city,cpe.state,cpe.zip,cpe.bootfile,cpe.software_version,cpe.hardware_version
// now i need to iterate over each Metadata_element belonging to
// topology.element.metadata
// are there any metadata?
if (metadata_element != null && metadata_element.length != 0)
for (int j = 0; j < metadata_element.length; j++) {
String label = metadata_element[j].getLabel();
label = label.toLowerCase();
LOGGER.debug(" ==label: " + label + " index_pos: " + j);
indexIntermediateId++;
headerIntermediateIndex.put(label, indexIntermediateId);
}
printMap(headerIntermediateIndex);
LOGGER.debug("COMPLETED method createIntermediateIndex.");
}
File csvInputFile=新文件(csv\u路径);
int blockSize=409600;
brCsvInputFile=新的BufferedReader(frCsvInputFile,blockSize);
字符串行=null;
StringBuilder sbIntermediate=新建StringBuilder();
skipFirstLine(brCsvInputFile);
而((line=brCsvInputFile.readLine())!=null){
createIntermediateStringBuffer(sbIntermediate,line.split(REGEX_逗号));
}
私有静态void skipFirstLine(BufferedReader br)引发IOException{
String line=br.readLine();
String[]splitLine=line.split(REGEX_逗号);
debug(“检测到第一行!”);
创建索引(分割线);
创建中间索引(分割线);
}
私有静态void createIndex(字符串[]拆分行){
debug(“启动方法createIndex”);
对于(int i=0;i
读取整个数据集,1.280.000行需要800毫秒!所以问题在于这种方法
private static void createIntermediateStringBuffer(StringBuilder sbIntermediate, String[] splitLine) throws ClassCastException,
NullPointerException {
LOGGER.debug("START method createIntermediateStringBuffer.");
long start, end;
start = System.currentTimeMillis();
ArrayList<String> hashes = new ArrayList<String>();
com.tekcomms.c2d.xml.model.v2.Metadata_element[] metadata_element = null;
String[] servicePath = newTopology.getElement().getEntity().getService_path().getLevel();
LOGGER.debug(servicePath.toString());
if (newTopology.getElement().getMetadata() != null) {
metadata_element = newTopology.getElement().getMetadata().getMetadata_element();
LOGGER.debug(metadata_element.toString());
}
for (int i = 0; i < servicePath.length; i++) {
String level = servicePath[i];
LOGGER.debug("level is: " + level);
if (splitLine.length > getPositionFromIndex(level)) {
String name = splitLine[getPositionFromIndex(level)];
sbIntermediate.append(name);
hashes.add(name);
sbIntermediate.append(REGEX_COMMA).append(HashUtils.calculateHash(hashes)).append(REGEX_COMMA);
LOGGER.debug(" ==sbIntermediate: " + sbIntermediate.toString());
}
}
// end=System.currentTimeMillis();
// LOGGER.info("COMPLETED adding name hash. " + (end - start) + " ms. " + (end - start) / 1000 + " seg.");
// adding cpe.latitude,cpe.longitude,cpe.customer_class, it should be
// better if it would be metadata as well.
String labelLatitude = newTopology.getElement().getEntity().getLatitude();
if (splitLine.length > getPositionFromIndex(labelLatitude)) {
String lat = splitLine[getPositionFromIndex(labelLatitude)];
sbIntermediate.append(lat).append(REGEX_COMMA);
}
String labelLongitude = newTopology.getElement().getEntity().getLongitude();
if (splitLine.length > getPositionFromIndex(labelLongitude)) {
String lon = splitLine[getPositionFromIndex(labelLongitude)];
sbIntermediate.append(lon).append(REGEX_COMMA);
}
String labelCustomerClass = newTopology.getElement().getCustomer_class();
if (splitLine.length > getPositionFromIndex(labelCustomerClass)) {
String customerClass = splitLine[getPositionFromIndex(labelCustomerClass)];
sbIntermediate.append(customerClass).append(REGEX_COMMA);
}
// end=System.currentTimeMillis();
// LOGGER.info("COMPLETED adding lat,lon,customer. " + (end - start) + " ms. " + (end - start) / 1000 + " seg.");
// watch out metadata are optional, it can appear as a void chain!
if (metadata_element != null && metadata_element.length != 0)
for (int j = 0; j < metadata_element.length; j++) {
String label = metadata_element[j].getLabel();
LOGGER.debug(" ==label: " + label + " index_pos: " + j);
if (splitLine.length > getPositionFromIndex(label)) {
String actualValue = splitLine[getPositionFromIndex(label)];
if (!"".equals(actualValue))
sbIntermediate.append(actualValue).append(REGEX_COMMA);
else
sbIntermediate.append("").append(REGEX_COMMA);
} else
sbIntermediate.append("").append(REGEX_COMMA);
LOGGER.debug(" ==sbIntermediate: " + sbIntermediate.toString());
}//for
sbIntermediate.append("\n");
end = System.currentTimeMillis();
LOGGER.info("COMPLETED method createIntermediateStringBuffer. " + (end - start) + " ms. ");
}
private static void createIntermediateStringBuffer(StringBuilder sbIntermediate,String[]splitLine)抛出ClassCastException,
NullPointerException{
debug(“START方法createIntermediateStringBuffer”);
漫长的开始,漫长的结束;
start=System.currentTimeMillis();
ArrayList哈希=新的ArrayList();
com.tekcomms.c2d.xml.model.v2.Metadata\u element[]Metadata\u element=null;
字符串[]servicePath=newTopology.getElement().getEntity().getService_path().getLevel();
debug(servicePath.toString());
if(newTopology.getElement().getMetadata()!=null){
metadata_element=newTopology.getElement().getMetadata().getMetadata_element();
debug(metadata_element.toString());
}
for(int i=0;igetPositionFromIndex(级别)){
字符串名称=拆分行[getPositionFromIndex(级别)];
sbIntermediate.append(名称);
添加(名称);
sbIntermediate.append(REGEX_逗号).append(HashUtils.calculateHash(hashes)).append(REGEX_逗号);
debug(“==sbIntermediate:+sbIntermediate.toString());
}
}
//end=System.currentTimeMillis();
//info(“已完成添加名称哈希。”+(end-start)+“ms.”+(end-start)/1000+“seg.”);
//添加cpe.latitude、cpe.longitude、cpe.customer_类,应该是
//最好是元数据。
字符串Labellative=newTopology.getElement().getEntity().getLatitude();
if(splitLine.length>getPositionFromIndex(Labellative)){
字符串lat=分割线[getPositionFromIndex(Labellative)];
sbIntermediate.append(lat.append)(REGEX_逗号);
}
字符串labellongitute=newTopology.getElement().getEntity().getLongitude();
if(splitLine.length>getPositionFromIndex(标签长度)){
字符串lon=分割线[getPositionFromIndex(标签长度)];
sbIntermediate.append(lon).append(REGEX_
for (int i = 0; i < servicePath.length; i++) {
String level = servicePath[i];
LOGGER.debug("level is: " + level);
if (splitLine.length > getPositionFromIndex(level)) {
String name = splitLine[getPositionFromIndex(level)];
sbIntermediate.append(name);
hashes.add(name);
sbIntermediate.append(REGEX_COMMA).append(HashUtils.calculateHash(hashes)).append(REGEX_COMMA);
LOGGER.debug(" ==sbIntermediate: " + sbIntermediate.toString());
}
}
line.split(REGEX_COMMA)