Javascript 如何锁定websocket(或状态)连接,直到刮板';结束了
我有多个刮刀,如下所示:Javascript 如何锁定websocket(或状态)连接,直到刮板';结束了,javascript,node.js,web-scraping,websocket,socket.io,Javascript,Node.js,Web Scraping,Websocket,Socket.io,我有多个刮刀,如下所示: await scraper1.run await scraper2.run // etc 为了提高性能和响应时间,我使用websocket,并将套接字连接向下传递到每个刮板,并为每个单个项目(结果)发出消息 上下文:当用户刷新时,多个套接字连接使刮取器运行多次,数据重复。我使用mongodb阻止了复制,但性能问题仍然存在,因为刮取器会一直运行,直到结果就绪,然后我会检查数据库 问题:如何锁定或防止刮刀多次运行,并等待每个刮刀使用websocket完成 我可以向您提出下
await scraper1.run
await scraper2.run
// etc
为了提高性能和响应时间,我使用websocket,并将套接字连接向下传递到每个刮板,并为每个单个项目(结果)发出消息
上下文:当用户刷新时,多个套接字连接使刮取器运行多次,数据重复。我使用mongodb阻止了复制,但性能问题仍然存在,因为刮取器会一直运行,直到结果就绪,然后我会检查数据库
问题:如何锁定或防止刮刀多次运行,并等待每个刮刀使用websocket完成 我可以向您提出下一个解决方案(我没有测试它,但我想您可以了解我试图实现的目标,下面的示例说明): 为了简化这些事情,这个例子很简单,但在性能/体系结构方面不是最好的。此解决方案实现: 场景1(有人第一次询问1)
希望这有帮助让我看看我是否理解正确:1。用户通过websocket连接,开始刮取。2.随着刮取的进行,用户通过同一个套接字返回结果。3.在某个时刻,用户刷新浏览器窗口,这会断开所有websocket连接。4.刷新完成后将建立新套接字,但用户不再看到更新(旧套接字丢失)。5.用户尝试重新启动相同的刮片。是这个吗?您试图解决的问题是1。你不需要重复的刮伤,2。重新连接后,用户是否应继续看到其正在运行的刮痕?
const express = require('express')
const app = express()
const http = require('http').Server(app)
const cors = require('cors')
const puppeteer = require('puppeteer')
const io = require('socket.io')(http)
const mongoose = require('mongoose')
const _ = require('lodash')
const scraper1 = require('./scraper1')
const scraper2 = require('./scraper2')
mongoose.connect("mongodb://localhost:27017/test")
;(async function () {
try {
const browser = await puppeteer.launch({
headless: false
})
io.on('connection', async function (socket) {
socket.on('search', async function (query) {
// check whether document exists with user ip address then return
// otherwise run the scrapres
await scraper1.run(browser, socket, query)
await scraper2.run(browser, socket, query)
})
})
} catch (e) {
console.log(e)
}
})()
http.listen(3000)
'use strict';
const queryState = {
};
const getQueryKey = (query) => {
// base64 but can be a hash like sha256
const key = Buffer.from(query).toString('base64');
return key;
};
/**
* Return query state
* @param {String} query
* @return {String} state [PENDING, DONE, null] null if query doesn't exist
*/
const getQueryState = (query) => {
const key = getQueryKey(query);
const state = queryState[key] || null;
return state;
};
/**
* Add a query and initialize it as pending
* @param {String} query
* @return {String} state
*/
const addQuery = (query) => {
const key = getQueryKey(query);
const state = 'PENDING';
queryState[key] = state;
return state;
};
/**
* Hashmap to associate pending queries to be notified to socket connections
* when query is done
* This structure keeps and array of callbacks per query key
*/
const observers = {
};
const addObserver = (query, callback) => {
const key = getQueryKey(query);
if (typeof observers[key] !== 'undefined') {
observers[key] = [callback];
} else {
observers[key] = [...observers[key], callback];
}
};
const notifyObservers = (query) => {
const key = getQueryKey(query);
const callbacks = observers[key] || [];
// TODO: get query data scrapper from a cache / database / etc
const data = getDataFromQuery(query);
callbacks.forEach((callback) => {
callback(data);
});
};
/**
* Update query status to done
* PreCD: query must exist in queryState (previously added using addQuery)
* @param {String} query
* @return {String} state
*/
const endQuery = (query) => {
const key = getQueryKey(query);
const state = 'DONE';
queryState[key] = state;
return state;
};
io.on('connection', async function (socket) {
socket.on('search', async function (query) {
/**
* If query doesn't exist, scrap it
*/
const state = getQueryState(query);
if (state === null) {
addQuery(query);
await scraper1.run(browser, socket, query);
await scraper2.run(browser, socket, query);
endQuery(query);
// store scrapper data in cache / database / etc and
// socket send scraperData to the user
// notify pending queries to send data scrapper
notifyObservers(query);
} else if (state === 'PENDING') {
// add callback to return data to the user
addObserver(query, (scraperData) => {
// socket send scraperData to the user
});
} else {
// socket send scraperData to the user
}
});
});