JavaScript内存不足增加节点内存或优化代码
我的Node.js应用程序内存不足。我试图通过一次调用在MongoDB中插入408000个数据。我有两个循环。第一个循环从1到24,第二个(在第一个循环内)从1到17000。这些数据是NetCDF文件的结果。我在解析这个文件中的数据,我在构建模型对象,我在MongoDB中插入这些数据 我在StackOverflow上看到一些关于这个问题的帖子,然后我看到我可以使用JavaScript内存不足增加节点内存或优化代码,javascript,node.js,mongodb,memory,optimization,Javascript,Node.js,Mongodb,Memory,Optimization,我的Node.js应用程序内存不足。我试图通过一次调用在MongoDB中插入408000个数据。我有两个循环。第一个循环从1到24,第二个(在第一个循环内)从1到17000。这些数据是NetCDF文件的结果。我在解析这个文件中的数据,我在构建模型对象,我在MongoDB中插入这些数据 我在StackOverflow上看到一些关于这个问题的帖子,然后我看到我可以使用--max\u old\u space\u size增加节点内存。但我不知道这是不是好办法。也许你有一些建议来优化我的代码 这是我的循
--max\u old\u space\u size
增加节点内存。但我不知道这是不是好办法。也许你有一些建议来优化我的代码
这是我的循环:
for (var time_pos = 0; time_pos < 24; time_pos++) {
// This array contains 17 000 data
var dataSliced = file.root.variables['pm10_conc'].readSlice(
time_pos, time_size,
level_pos, level_size,
lat_from, lat_size,
lng_from, lng_size
);
// Loop : 0 to 17 000
for (var i = 0; i < dataSliced.length; i++) {
var pollution = new Pollution();
latitude = current_lat;
longitude = currrent_lng;
country = country_name;
model = model_name;
data_type = type_name;
level = 0;
datetime = date;
pollutants.pm10.description = description;
pollutants.pm10.units = units;
pollutants.pm10.concentration = dataSliced[i];
pollution.save(function(err){
if (err) throw err;
console.log("Data saved");
})
}
}
如果您有更好的解决方案,可以发布您的答案。我希望这对您和其他人有所帮助…:-) 我一直在做非常深入的研究,研究将数据导入Mongodb的最佳方式。我使用过,也使用过Mongoose(使用原生Mongodb)。我曾读到,最好将批处理大小保持在100左右,以获得最佳性能。下面是我使用insertMany的解决方案。使用Mongoimport非常简单(只有一行代码)。所以我认为没有必要在这里发帖 在我的示例602.198中,首先将记录解析为对象数组,然后成功地将其导入Mongodb 将解析后的对象导入Mongodb需要一些内存,因此通常需要使用下面的命令来允许节点使用更多内存,可以读取更多内容 为了提高效率,我将对象数组拆分为多个批,并在iterable参数中的所有承诺都已解决时使用该解决方案 如果文件较大,并且内存不足,即使按节点增加内存余量,也可以拆分文件。删除之前的标题,并将其添加到csv解析器中 要分割文件,请执行以下操作:
$ split -l numberoflines filename
ex. split -l 1000000 term2.csv
假设term2.csv有5.000.001行,没有标题。
从上面的示例中,您将得到6个文件,5个文件,每个文件有一百万行,一个文件有一行
看看我是如何在mongodb.js文件中的函数bulkimporttomango
中解决这个问题的
控制台
➜ database git:(master) ✗ node --max_old_space_size=8000 partImport.js
Connected to db!
Time to parse file: : 5209.325ms
Disconnected from db!
Time to import parsed objects to db: : 153606.545ms
➜ database git:(master) ✗
parseCSV.js
const csv = require("fast-csv");
function promiseCSV(filePath, options) {
return new Promise((resolve, reject) => {
console.time("Time to parse file");
var records = [];
csv
.fromPath(filePath, options)
.on("data", record => {
records.push(record);
})
.on("end", () => {
console.timeEnd("Time to parse file");
resolve(records);
});
});
}
module.exports = promiseCSV;
const mongoose = require("mongoose");
mongoose.Promise = global.Promise;
function connectToMongo(databaseName) {
mongoose.connect(`mongodb://localhost:27017/${databaseName}`, {
keepAlive: true,
reconnectTries: Number.MAX_VALUE,
useMongoClient: true
});
console.log("Connected to db!");
}
function disconnectFromMongo() {
mongoose.disconnect();
console.log("Disconnected from db!");
}
function bulkImportToMongo(arrayToImport, mongooseModel) {
const Model = require(`../../../models/${mongooseModel}`);
const batchSize = 100;
let batchCount = Math.ceil(arrayToImport.length / batchSize);
let recordsLeft = arrayToImport.length;
let ops = [];
let counter = 0;
for (let i = 0; i < batchCount; i++) {
let batch = arrayToImport.slice(counter, counter + batchSize);
counter += batchSize;
ops.push(Model.insertMany(batch));
}
return Promise.all(ops);
}
module.exports.bulkImportToMongo = bulkImportToMongo;
module.exports.connectToMongo = connectToMongo;
module.exports.disconnectFromMongo = disconnectFromMongo;
const path = require("path");
const parseCSV = require("./helpers/parseCSV");
const {
connectToMongo,
disconnectFromMongo,
bulkImportToMongo
} = require("./helpers/mongodb");
const filePath = path.join(__dirname, "../../data/parts.csv");
const options = {
delimiter: ";",
noheader: true,
headers: [
"facility",
"partNumber",
"partName",
"partDescription",
"netWeight",
"customsTariff"
]
};
connectToMongo("autoMDM");
parseCSV(filePath, options)
.then(records => {
console.time("Time to import parsed objects to db");
return bulkImportToMongo(records, "parts.js");
})
/* .then(result =>
console.log("Total batches inserted: ", result, result.length)
) */
.then(() => {
disconnectFromMongo();
console.timeEnd("Time to import parsed objects to db");
})
.catch(error => console.log(error));
mongodb.js
const csv = require("fast-csv");
function promiseCSV(filePath, options) {
return new Promise((resolve, reject) => {
console.time("Time to parse file");
var records = [];
csv
.fromPath(filePath, options)
.on("data", record => {
records.push(record);
})
.on("end", () => {
console.timeEnd("Time to parse file");
resolve(records);
});
});
}
module.exports = promiseCSV;
const mongoose = require("mongoose");
mongoose.Promise = global.Promise;
function connectToMongo(databaseName) {
mongoose.connect(`mongodb://localhost:27017/${databaseName}`, {
keepAlive: true,
reconnectTries: Number.MAX_VALUE,
useMongoClient: true
});
console.log("Connected to db!");
}
function disconnectFromMongo() {
mongoose.disconnect();
console.log("Disconnected from db!");
}
function bulkImportToMongo(arrayToImport, mongooseModel) {
const Model = require(`../../../models/${mongooseModel}`);
const batchSize = 100;
let batchCount = Math.ceil(arrayToImport.length / batchSize);
let recordsLeft = arrayToImport.length;
let ops = [];
let counter = 0;
for (let i = 0; i < batchCount; i++) {
let batch = arrayToImport.slice(counter, counter + batchSize);
counter += batchSize;
ops.push(Model.insertMany(batch));
}
return Promise.all(ops);
}
module.exports.bulkImportToMongo = bulkImportToMongo;
module.exports.connectToMongo = connectToMongo;
module.exports.disconnectFromMongo = disconnectFromMongo;
const path = require("path");
const parseCSV = require("./helpers/parseCSV");
const {
connectToMongo,
disconnectFromMongo,
bulkImportToMongo
} = require("./helpers/mongodb");
const filePath = path.join(__dirname, "../../data/parts.csv");
const options = {
delimiter: ";",
noheader: true,
headers: [
"facility",
"partNumber",
"partName",
"partDescription",
"netWeight",
"customsTariff"
]
};
connectToMongo("autoMDM");
parseCSV(filePath, options)
.then(records => {
console.time("Time to import parsed objects to db");
return bulkImportToMongo(records, "parts.js");
})
/* .then(result =>
console.log("Total batches inserted: ", result, result.length)
) */
.then(() => {
disconnectFromMongo();
console.timeEnd("Time to import parsed objects to db");
})
.catch(error => console.log(error));
听起来你需要成批执行,而不是一次完成。第二个循环执行408000(24*17000)次,你可能应该从更改开始。@Daniel A.White你的意思是我必须做一个cron吗?不,我的意思是在程序中。你有成批执行的例子吗?