Javascript Node.js CSV到JSON-内存不足(~1'000'000条记录)
我有一个将CSV转换为JSON的脚本,大约有1'000'000条记录需要解析和构建JSON对象,然后将这些对象写入MongoDB数据库,但是我的脚本在100000条记录时返回内存不足错误,除非我在分配10000mb RAM的情况下传入-max old space size标志 下面是代码,有人能提出修改建议以提高此脚本的内存效率吗Javascript Node.js CSV到JSON-内存不足(~1'000'000条记录),javascript,json,node.js,mongodb,csv,Javascript,Json,Node.js,Mongodb,Csv,我有一个将CSV转换为JSON的脚本,大约有1'000'000条记录需要解析和构建JSON对象,然后将这些对象写入MongoDB数据库,但是我的脚本在100000条记录时返回内存不足错误,除非我在分配10000mb RAM的情况下传入-max old space size标志 下面是代码,有人能提出修改建议以提高此脚本的内存效率吗 /* * @desc Builds JSON from Ofcom CSV source(s) for postcode broadband data * @auth
/*
* @desc Builds JSON from Ofcom CSV source(s) for postcode broadband data
* @author Jacob Clark
*/
var fs = require("fs");
var MongoClient = require('mongodb').MongoClient
/*
* BroadbandData Class Constructor
* @params Array (Object)
*/
function BroadbandData(sources){
this.sources = sources;
this.header = ["Postcode(No Spaces)", "Postcode Data Status", "Lines < 2Mbps(Y/N)", "Average Speed/Mbps", "Median Speed/Mbps", "Maximum Speed/Mbps", "NGA Available(Y/N)", "Number of Connections"];
this.broadbandDataJSON = [];
this.mongoDB = null;
};
/*
* Connect to MongoDB
* @params Function, Object (JSON)
*/
BroadbandData.prototype.connectToMongoDB = function(callback, dataArrays){
_this = this;
MongoClient.connect('mongodb://127.0.0.1:27017/UKBroadbandCoverageAndSpeed', function(err, db) {
_this.mongoDB = db;
callback(dataArrays, _this);
});
};
/*
* Get data from sources
* @returns Array (Object)
*/
BroadbandData.prototype.getData = function(){
data = [];
for(source in this.sources){
data.push(fs.readFileSync(this.sources[source]).toString().split("\n"));
}
return data;
};
/*
* Build JSON from built data arrays
* @params Array (Object), Object
* @returns null
*/
BroadbandData.prototype.buildJSON = function(dataArrays, bd){
//console.log(this)
for(array in dataArrays){
for(datum in dataArrays[array]) {
postcodeData = dataArrays[array][datum].split(",");
var tempObj = {};
for(postcodeDatum in postcodeData){
key = bd.header[postcodeDatum];
value = postcodeData[postcodeDatum]
tempObj[key] = value;
}
bd.persist(tempObj, "mongodb");
// Limiting to 202 records for testing -
/*
if(datum == 100)
break;
*/
}
}
};
/*
* Persistance
* @params Array (Object), String
* @returns null
*/
BroadbandData.prototype.persist = function(obj, source){
if(source == 'mongodb'){
var collection = this.mongoDB.collection('data');
collection.insert(obj, function(err, docs) {
if(err) throw(err);
})
}else if(source == 'object'){
this.broadbandDataJSON.push(obj);
}
};
var bd = new BroadbandData(["../data/ofcom-uk-fixed-broadband-postcode-level-data-2013/ofcom-part1-fixed-broadband-postcode-level-data-2013.csv", "../data/ofcom-uk-fixed-broadband-postcode-level-data-2013/ofcom-part2-fixed-broadband-postcode-level-data-2013.csv"]);
bd.connectToMongoDB(bd.buildJSON, bd.getData())
数据持久化后是否将其保存在内存/数组中?this.broadbandDataJSON.pushobj;瞎猜,这一行占用了所有的空间。在这种情况下,这一行永远不会执行。我正在将代码完全推入Mongo,因此该数组中没有存储任何内容。是否将整个文件读入内存而不是分块读取?在getData中-data=[];将整个文件读入该数组。按块阅读,例如,读取1000个值,将它们放入mongo集合,再读取1000个。。。