Javascript Node.js CSV到JSON-内存不足(~1'000'000条记录)

Javascript Node.js CSV到JSON-内存不足(~1'000'000条记录),javascript,json,node.js,mongodb,csv,Javascript,Json,Node.js,Mongodb,Csv,我有一个将CSV转换为JSON的脚本,大约有1'000'000条记录需要解析和构建JSON对象,然后将这些对象写入MongoDB数据库,但是我的脚本在100000条记录时返回内存不足错误,除非我在分配10000mb RAM的情况下传入-max old space size标志 下面是代码,有人能提出修改建议以提高此脚本的内存效率吗 /* * @desc Builds JSON from Ofcom CSV source(s) for postcode broadband data * @auth

我有一个将CSV转换为JSON的脚本,大约有1'000'000条记录需要解析和构建JSON对象,然后将这些对象写入MongoDB数据库,但是我的脚本在100000条记录时返回内存不足错误,除非我在分配10000mb RAM的情况下传入-max old space size标志

下面是代码,有人能提出修改建议以提高此脚本的内存效率吗

/*
* @desc Builds JSON from Ofcom CSV source(s) for postcode broadband data
* @author Jacob Clark
*/

var fs          = require("fs");
var MongoClient = require('mongodb').MongoClient

/*
* BroadbandData Class Constructor
* @params Array (Object)
*/
function BroadbandData(sources){
    this.sources                = sources;
    this.header                 = ["Postcode(No Spaces)", "Postcode Data Status", "Lines < 2Mbps(Y/N)", "Average Speed/Mbps", "Median Speed/Mbps", "Maximum Speed/Mbps", "NGA Available(Y/N)", "Number of Connections"];
    this.broadbandDataJSON      = [];
    this.mongoDB                = null;
};


/*
* Connect to MongoDB
* @params Function, Object (JSON)
*/
BroadbandData.prototype.connectToMongoDB = function(callback, dataArrays){
    _this = this;
    MongoClient.connect('mongodb://127.0.0.1:27017/UKBroadbandCoverageAndSpeed', function(err, db) {
        _this.mongoDB = db;
        callback(dataArrays, _this);
    });
};

/*
* Get data from sources
* @returns Array (Object)
*/
BroadbandData.prototype.getData = function(){
    data = [];
    for(source in this.sources){
        data.push(fs.readFileSync(this.sources[source]).toString().split("\n"));
    }
    return data;
};

/*
* Build JSON from built data arrays
* @params Array (Object), Object
* @returns null
*/
BroadbandData.prototype.buildJSON = function(dataArrays, bd){
    //console.log(this)
    for(array in dataArrays){
        for(datum in dataArrays[array]) {
            postcodeData = dataArrays[array][datum].split(",");

            var tempObj = {};

            for(postcodeDatum in postcodeData){
                key = bd.header[postcodeDatum];
                value = postcodeData[postcodeDatum]

                tempObj[key] = value;

            }

            bd.persist(tempObj, "mongodb"); 

            // Limiting to 202 records for testing - 
            /*
            if(datum == 100)
                break;  
            */
        }


    }
};

/*
* Persistance
* @params Array (Object), String
* @returns null
*/
BroadbandData.prototype.persist = function(obj, source){
    if(source == 'mongodb'){
        var collection = this.mongoDB.collection('data');
        collection.insert(obj, function(err, docs) { 
            if(err) throw(err); 
        })
    }else if(source == 'object'){
        this.broadbandDataJSON.push(obj);
    }
};

var bd = new BroadbandData(["../data/ofcom-uk-fixed-broadband-postcode-level-data-2013/ofcom-part1-fixed-broadband-postcode-level-data-2013.csv", "../data/ofcom-uk-fixed-broadband-postcode-level-data-2013/ofcom-part2-fixed-broadband-postcode-level-data-2013.csv"]);
bd.connectToMongoDB(bd.buildJSON, bd.getData())

数据持久化后是否将其保存在内存/数组中?this.broadbandDataJSON.pushobj;瞎猜,这一行占用了所有的空间。在这种情况下,这一行永远不会执行。我正在将代码完全推入Mongo,因此该数组中没有存储任何内容。是否将整个文件读入内存而不是分块读取?在getData中-data=[];将整个文件读入该数组。按块阅读,例如,读取1000个值,将它们放入mongo集合,再读取1000个。。。