Node.js createReadStream一次只能处理500行/行
我必须阅读一个非常大的Node.js createReadStream一次只能处理500行/行,node.js,mongodb,Node.js,Mongodb,我必须阅读一个非常大的CSV文件,所以通过谷歌搜索,了解createReadStream。我正在使用一个程序读取csv文件数据并将其插入mongoDB 我正在遵循的流程 使用createReadStream处理数据(我认为它逐行读取文件) 将数据存储到数组中 使用insertMany 现在的问题是,整个文件首先被存储到一个数组中,然后插入到数据库中 但我认为更好的方法是,我只将前500行/行存储到一个数组中,将其插入数据库,然后对接下来的500条记录再次执行相同的步骤 有可能做到这一点吗? 这也
CSV文件
,所以通过谷歌搜索,了解createReadStream
。我正在使用一个程序读取csv文件数据并将其插入mongoDB
我正在遵循的流程
insertMany
const test = async () => {
const stream = fs.createReadStream(workerData)
.pipe(parse())
.on('data', async function(csvrow) {
try{
stream.pause()
if(!authorName.includes(csvrow.author)) {
const author = new Author({author: csvrow.author})
authorId = author._id
authorName.push(author.author)
authorData.push(author)
}
if(!companyName.includes(csvrow.company_name)) {
const company = new Company({companyName: csvrow.company_name})
companyID = company._id
companyName.push(company.companyName)
companyData.push(company)
}
users = new User({
name: csvrow.firstname,
dob: csvrow.dob,
address: csvrow.address,
phone: csvrow.phone,
state: csvrow.state,
zip: csvrow.zip,
email: csvrow.email,
gender: csvrow.gender,
userType: csvrow.userType
})
userData.push(users)
book = new Book({
book_number: csvrow.book_number,
book_name: csvrow.book_name,
book_desc: csvrow.book_desc,
user_id: users._id,
author_id: authorId
})
bookData.push(book)
relationalData.push({
username: users.name,
author_id: authorId,
book_id: book._id,
company_id: companyID
})
}finally {
stream.resume()
}
})
.on('end', async function() {
try {
Author.insertMany(authorData)
User.insertMany(userData)
Book.insertMany(bookData)
Company.insertMany(companyData)
await Relational.insertMany(relationalData)
parentPort.postMessage("true")
}catch(e){
console.log(e)
parentPort.postMessage("false")
}
})
}
test()
这个程序也可以很好地将数据插入数据库,但我正在寻找类似的东西:
const stream = fs.createReadStream(workerData)
.pipe(parse())
.on('data', async function(csvrow, maxLineToRead: 500) {
// whole code/logic of insert data into DB
})
所以,maxLineToRead
是我想象中的术语
基本上,我的观点是我希望一次处理500个数据,并将其插入数据库,并希望重复此过程直到结束。您可以创建一个范围更大的数组变量,在
数据事件到达时,您可以在其中累积数据行。当达到500行时,启动数据库操作以插入它们。如果还没有到500行,那么只需将下一行添加到数组中,并等待更多的数据
事件到来
然后,在end
事件中,插入仍在更高范围数组中的任何剩余行
通过这种方式,您将一次插入500个,然后在末尾插入剩余的数量。与在解析期间分散数据库负载的末尾插入它们相比,这有一个优势
下面是实现该类型处理的一个尝试。根据对您在某些情况下试图实现的目标的不完整描述,存在一些未知数(用注释记录):
感谢您的回复,但请您多帮我一点。问题是,您的回答听起来不错,但我没有听懂。我的意思是,我不明白如何执行此操作,因此,请您通过一些示例或其他内容来解释一下。@DhirendraSaw-我对您试图完成的部分内容有所了解,我在回答中添加了一个实现。代码中记录了一些未回答的问题,您必须修复/解决这些问题。感谢您的帮助,您的逻辑工作正常。
const test = () => {
return new Promise((resolve, reject) => {
const accumulatedRows = [];
async function processRows(rows) {
// initialize data arrays that we will insert
const authorData = [],
companyData = [],
userData = [],
bookData = [],
relationalData = [];
// this code still has a problem that I don't have enough context
// to know how to solve
// If authorName contains csvrow.author, then the variable
// authorId is not initialized, but is used later in the code
// This is a problem that needs to be fixed.
// The same issue occurs for companyID
for (let csvrow of rows) {
let authorId, companyID;
if (!authorName.includes(csvrow.author)) {
const author = new Author({ author: csvrow.author })
authorId = author._id
authorName.push(author.author)
authorData.push(author)
}
if (!companyName.includes(csvrow.company_name)) {
const company = new Company({ companyName: csvrow.company_name })
companyID = company._id
companyName.push(company.companyName)
companyData.push(company)
}
let users = new User({
name: csvrow.firstname,
dob: csvrow.dob,
address: csvrow.address,
phone: csvrow.phone,
state: csvrow.state,
zip: csvrow.zip,
email: csvrow.email,
gender: csvrow.gender,
userType: csvrow.userType
});
userData.push(users)
let book = new Book({
book_number: csvrow.book_number,
book_name: csvrow.book_name,
book_desc: csvrow.book_desc,
user_id: users._id,
author_id: authorId
});
bookData.push(book)
relationalData.push({
username: users.name,
author_id: authorId,
book_id: book._id,
company_id: companyID
});
}
// all local arrays of data are populated now for this batch
// so add this data to the database
await Author.insertMany(authorData);
await User.insertMany(userData);
await Book.insertMany(bookData);
await Company.insertMany(companyData);
await Relational.insertMany(relationalData);
}
const batchSize = 50;
const stream = fs.createReadStream(workerData)
.pipe(parse())
.on('data', async function(csvrow) {
try {
accumulatedRows.push(csvRow);
if (accumulatedRows.length >= batchSize) {
stream.pause();
await processRows(accumulatedRows);
// clear out the rows we just processed
acculatedRows.length = 0;
stream.resume();
}
} catch (e) {
// calling destroy(e) will prevent leaking a stream
// and will trigger the error event to be called with that error
stream.destroy(e);
}
}).on('end', async function() {
try {
await processRows(accumulatedRows);
resolve();
} catch (e) {
reject(e);
}
}).on('error', (e) => {
reject(e);
});
});
}
test().then(() => {
parentPort.postMessage("true");
}).catch(err => {
console.log(err);
parentPort.postMessage("false");
});