Javascript 在所有线程完成运行后,如何运行代码?
我有一个多线程的网络爬虫,可以下载一个网站并将其存储在数据库中(大约需要4分钟)。为了加快爬行速度,我使用了node.js cluster模块,但我有一个问题,我想在所有线程完成它们的进程后,而不是在它们启动后,迭代到while循环的下一段。我如何确保我所有的线索都结束了,然后继续 以下是主while循环中的相关代码:Javascript 在所有线程完成运行后,如何运行代码?,javascript,node.js,multithreading,Javascript,Node.js,Multithreading,我有一个多线程的网络爬虫,可以下载一个网站并将其存储在数据库中(大约需要4分钟)。为了加快爬行速度,我使用了node.js cluster模块,但我有一个问题,我想在所有线程完成它们的进程后,而不是在它们启动后,迭代到while循环的下一段。我如何确保我所有的线索都结束了,然后继续 以下是主while循环中的相关代码: while (indexSize !== indexSizeLimit) { const queueLength = queue.length;
while (indexSize !== indexSizeLimit) {
const queueLength = queue.length;
const numberOfThreads = Math.min(numberOfCPUs, queueLength);
const threadAllocations = Array(numberOfThreads).fill(0);
let queuesAllocated = 0;
const queueChunks = [];
function fillQueueChunks() {
loop: while (true) {
for (let i = 0; i < numberOfThreads; i++) {
threadAllocations[i] += 1;
queuesAllocated += 1;
if (queuesAllocated === queueLength) {
break loop;
};
};
};
let start = 0;
for (let threadAllocation of threadAllocations) {
const end = start + threadAllocation;
queueChunks.push(queue.slice(start, end));
start = end;
};
};
fillQueueChunks();
// Find out how to make multithreading finish, and then move on with the loop.
if (cluster.isMaster) {
for (let i = 0; i < numberOfThreads; i++) {
cluster.fork();
};
} else {
const chunk = queueChunks[cluster.worker.id - 1];
await Promise.all(chunk.map(function (url) {
return new Promise(async function (resolve) {
const webcode = await request(url);
if (webcode !== "Failure") {
indexSize += 1;
const document = new Document(url, webcode);
const hrefs = document.hrefs();
const hrefsQuery = Query(hrefs);
// Also make sure it is not included in indexed webpages.
const hrefIndividualized = hrefsQuery.individualize();
hrefIndividualized;
// Do something with hrefIndividualized in regards to maintaining a queue in the database.
// And in adding a nextQueue which to replace the queue in code with.
await document.save();
};
resolve("Written");
});
}));
process.exit(0);
};
};
while(indexSize!==indexSizeLimit){
const queueLength=queue.length;
const numberOfThreads=Math.min(numberofcpu,queueLength);
常量线程分配=数组(numberOfThreads).fill(0);
让QueuesLocated=0;
const queueChunks=[];
函数fillQueueChunks(){
循环:while(true){
for(设i=0;i
用承诺来包装线程。如果存在断开连接事件,可以检入父线程,如果断开连接的数量等于线程的数量,则可以解析承诺
这是我的
while (indexSize !== indexSizeLimit) {
let nextQueue = [];
const queueLength = queue.length;
const numberOfThreads = Math.min(numberOfCPUs, queueLength);
const threadAllocations = Array(numberOfThreads).fill(0);
let queuesAllocated = 0;
// queueChunks: [[{_id: ..., ...}], [...], ...]
const queueChunks = [];
function fillQueueChunks() {
loop: while (true) {
for (let i = 0; i < numberOfThreads; i++) {
threadAllocations[i] += 1;
queuesAllocated += 1;
if (queuesAllocated === queueLength) {
break loop;
};
};
};
let start = 0;
for (let threadAllocation of threadAllocations) {
const end = start + threadAllocation;
queueChunks.push(queue.slice(start, end));
start = end;
};
};
fillQueueChunks();
await new Promise(async function (resolve) {
if (cluster.isMaster) {
let threadsDone = 0;
for (let i = 0; i < numberOfThreads; i++) {
cluster.fork();
};
cluster.on("disconnect", function (_) {
threadsDone += 1;
if (threadsDone === numberOfThreads) {
resolve("Queue Processed");
};
});
} else {
const queueJob = queueChunks[cluster.id - 1];
await Promise.all(queueJob.map(function (queueItem) {
return new Promise(async function (resolve) {
const url = queueItem._id;
const webcode = await request(url);
if (webcode !== "Failure") {
const document = Document(url, webcode);
let hrefs = document.hrefs();
const hrefsQuery = Query(hrefs);
await document.save();
indexSize += 1;
hrefs = hrefsQuery.individualize();
const hrefIncidences = Promise.all(hrefs.map(function (href) {
return new Promise(async function (resolve) {
const incidences = await Site.countDocuments({
url: href
});
resolve(incidences);
});
}));
hrefs = hrefs.filter(function (_, i) {
return hrefIncidences[i] === 0;
}).map(function (href) {
return {
_id: href
};
});
await Queue.insertMany(hrefs);
nextQueue = nextQueue.concat(hrefs);
};
await Queue.deleteOne({
_id: url
});
resolve("Success");
});
}));
process.exit(0);
};
});
queue = nextQueue;
};
while(indexSize!==indexSizeLimit){
设nextQueue=[];
const queueLength=queue.length;
const numberOfThreads=Math.min(numberofcpu,queueLength);
常量线程分配=数组(numberOfThreads).fill(0);
让QueuesLocated=0;
//队列块:[[{u id:…,…}],…],…]
const queueChunks=[];
函数fillQueueChunks(){
循环:while(true){
for(设i=0;i
您应该查看瓶颈: