Node.js createReadStream一次只能处理500行/行_Node.js_Mongodb

Node.js createReadStream一次只能处理500行/行

node.js mongodb

Node.js createReadStream一次只能处理500行/行,node.js,mongodb,Node.js,Mongodb,我必须阅读一个非常大的CSV文件，所以通过谷歌搜索，了解createReadStream。我正在使用一个程序读取csv文件数据并将其插入mongoDB 我正在遵循的流程使用createReadStream处理数据（我认为它逐行读取文件）将数据存储到数组中使用insertMany 现在的问题是，整个文件首先被存储到一个数组中，然后插入到数据库中但我认为更好的方法是，我只将前500行/行存储到一个数组中，将其插入数据库，然后对接下来的500条记录再次执行相同的步骤有可能做到这一点吗？这也

我必须阅读一个非常大的

CSV文件

，所以通过谷歌搜索，了解

createReadStream

。我正在使用一个程序读取csv文件数据并将其插入mongoDB

我正在遵循的流程

使用createReadStream处理数据（我认为它逐行读取文件）

将数据存储到数组中

使用

insertMany

现在的问题是，整个文件首先被存储到一个数组中，然后插入到数据库中

但我认为更好的方法是，我只将前500行/行存储到一个数组中，将其插入数据库，然后对接下来的500条记录再次执行相同的步骤

有可能做到这一点吗？这也是正确的方法吗

我的节目

const test = async () => {

    const stream = fs.createReadStream(workerData)
    .pipe(parse())
    .on('data', async function(csvrow) {

        try{
            stream.pause()
        
        if(!authorName.includes(csvrow.author)) {
            const author = new Author({author: csvrow.author})
            authorId = author._id
            authorName.push(author.author)
            authorData.push(author)
        }
        
        
        if(!companyName.includes(csvrow.company_name)) {
            const company = new Company({companyName: csvrow.company_name})
            companyID = company._id
            companyName.push(company.companyName)
            companyData.push(company)
        }
         users = new User({
            name: csvrow.firstname,
            dob: csvrow.dob,
            address: csvrow.address,
            phone: csvrow.phone,
            state: csvrow.state,
            zip: csvrow.zip,
            email: csvrow.email,
            gender: csvrow.gender,
            userType: csvrow.userType
        })

        userData.push(users)
        
        book = new Book({
            book_number: csvrow.book_number,
            book_name: csvrow.book_name,
            book_desc: csvrow.book_desc,
            user_id: users._id,
            author_id: authorId  
        })
        bookData.push(book)

        relationalData.push({
            username: users.name,
            author_id: authorId,
            book_id: book._id,
            company_id: companyID
        })

        }finally {
            stream.resume()
        }
        
    })
    .on('end', async function() {
       try {
         Author.insertMany(authorData)
         User.insertMany(userData)
         Book.insertMany(bookData)
         Company.insertMany(companyData)
        await Relational.insertMany(relationalData)
        parentPort.postMessage("true")
       }catch(e){
        console.log(e)
        parentPort.postMessage("false")
       }
     })
}

test()

这个程序也可以很好地将数据插入数据库，但我正在寻找类似的东西：

const stream = fs.createReadStream(workerData)
    .pipe(parse())
    .on('data', async function(csvrow, maxLineToRead: 500) {

  // whole code/logic of insert data into DB

})

所以，

maxLineToRead

是我想象中的术语

基本上，我的观点是我希望一次处理500个数据，并将其插入数据库，并希望重复此过程直到结束。

您可以创建一个范围更大的数组变量，在

数据事件到达时，您可以在其中累积数据行。当达到500行时，启动数据库操作以插入它们。如果还没有到500行，那么只需将下一行添加到数组中，并等待更多的数据
事件到来
然后，在end
事件中，插入仍在更高范围数组中的任何剩余行
通过这种方式，您将一次插入500个，然后在末尾插入剩余的数量。与在解析期间分散数据库负载的末尾插入它们相比，这有一个优势
下面是实现该类型处理的一个尝试。根据对您在某些情况下试图实现的目标的不完整描述，存在一些未知数（用注释记录）：
感谢您的回复，但请您多帮我一点。问题是，您的回答听起来不错，但我没有听懂。我的意思是，我不明白如何执行此操作，因此，请您通过一些示例或其他内容来解释一下。@DhirendraSaw-我对您试图完成的部分内容有所了解，我在回答中添加了一个实现。代码中记录了一些未回答的问题，您必须修复/解决这些问题。感谢您的帮助，您的逻辑工作正常。
const test = () => {
    return new Promise((resolve, reject) => {
        const accumulatedRows = [];

        async function processRows(rows) {
            // initialize data arrays that we will insert
            const authorData = [],
                companyData = [],
                userData = [],
                bookData = [],
                relationalData = [];

            // this code still has a problem that I don't have enough context
            // to know how to solve
            // If authorName contains csvrow.author, then the variable
            // authorId is not initialized, but is used later in the code
            // This is a problem that needs to be fixed.
            // The same issue occurs for companyID

            for (let csvrow of rows) {
                let authorId, companyID;

                if (!authorName.includes(csvrow.author)) {
                    const author = new Author({ author: csvrow.author })
                    authorId = author._id
                    authorName.push(author.author)
                    authorData.push(author)
                }

                if (!companyName.includes(csvrow.company_name)) {
                    const company = new Company({ companyName: csvrow.company_name })
                    companyID = company._id
                    companyName.push(company.companyName)
                    companyData.push(company)
                }

                let users = new User({
                    name: csvrow.firstname,
                    dob: csvrow.dob,
                    address: csvrow.address,
                    phone: csvrow.phone,
                    state: csvrow.state,
                    zip: csvrow.zip,
                    email: csvrow.email,
                    gender: csvrow.gender,
                    userType: csvrow.userType
                });
                userData.push(users)

                let book = new Book({
                    book_number: csvrow.book_number,
                    book_name: csvrow.book_name,
                    book_desc: csvrow.book_desc,
                    user_id: users._id,
                    author_id: authorId
                });
                bookData.push(book)

                relationalData.push({
                    username: users.name,
                    author_id: authorId,
                    book_id: book._id,
                    company_id: companyID
                });
            }
            // all local arrays of data are populated now for this batch
            // so add this data to the database
            await Author.insertMany(authorData);
            await User.insertMany(userData);
            await Book.insertMany(bookData);
            await Company.insertMany(companyData);
            await Relational.insertMany(relationalData);
        }

        const batchSize = 50;
        const stream = fs.createReadStream(workerData)
            .pipe(parse())
            .on('data', async function(csvrow) {
                try {
                    accumulatedRows.push(csvRow);
                    if (accumulatedRows.length >= batchSize) {
                        stream.pause();
                        await processRows(accumulatedRows);
                        // clear out the rows we just processed
                        acculatedRows.length = 0;
                        stream.resume();
                    }
                } catch (e) {
                    // calling destroy(e) will prevent leaking a stream
                    // and will trigger the error event to be called with that error
                    stream.destroy(e);
                }
            }).on('end', async function() {
                try {
                    await processRows(accumulatedRows);
                    resolve();
                } catch (e) {
                    reject(e);
                }
            }).on('error', (e) => {
                reject(e);
            });
    });
}

test().then(() => {
    parentPort.postMessage("true");
}).catch(err => {
    console.log(err);
    parentPort.postMessage("false");
});