Javascript 快报:想在回复后完成一项大任务吗
我创建了一个程序来提取7GB的zip文件。自动移动时工作正常Javascript 快报:想在回复后完成一项大任务吗,javascript,node.js,amazon-s3,stream,Javascript,Node.js,Amazon S3,Stream,我创建了一个程序来提取7GB的zip文件。自动移动时工作正常 const unzipUpload = path => { return new Promise((resolve, reject) => { let rStream = s3.getObject({Bucket: 'bucket', Key: path}) .createReadStream() .pi
const unzipUpload = path => {
return new Promise((resolve, reject) => {
let rStream = s3.getObject({Bucket: 'bucket', Key: path})
.createReadStream()
.pipe(unzip.Parse())
.on('entry', function (entry) {
if(entry.path.match(/__MACOSX/) == null){
// pause
if(currentFileCount - uploadedFileCount > 10) rStream.pause()
currentFileCount += 1
var fileName = entry.path;
let up = entry.pipe(uploadFromStream(s3,fileName))
up.on('uploaded', e => {
uploadedFileCount += 1
console.log(currentFileCount, uploadedFileCount)
//resume
if(currentFileCount - uploadedFileCount <= 10) rStream.resume()
if(uploadedFileCount === allFileCount) resolve()
entry.autodrain()
}).on('error', e => {
reject()
})
}
}).on('error', e => {
console.log("unzip error")
reject()
}).on('finish', e => {
allFileCount = currentFileCount
})
rStream.on('error', e=> {
console.log(e)
reject(e)
})
})
}
function uploadFromStream(s3,fileName) {
var pass = new stream.PassThrough();
var params = {Bucket: "bucket", Key: "hoge/unzip/" + fileName, Body: pass};
let request = s3.upload(params, function(err, data) {
if(err) pass.emit('error')
if(!err) pass.emit('uploaded')
})
request.on('httpUploadProgress', progress => {
console.log(progress)
})
return pass
}
任务1。当Express.js收到请求时,它将立即返回响应
任务2。返回响应后,根据请求从s3下载zip文件。然后解压缩。完成后将其上载到s3
任务3。如果您同时收到多个请求,我希望按顺序处理任务2。因为同时处理它会占用大量内存
此代码对应于任务2。自动移动时工作正常
const unzipUpload = path => {
return new Promise((resolve, reject) => {
let rStream = s3.getObject({Bucket: 'bucket', Key: path})
.createReadStream()
.pipe(unzip.Parse())
.on('entry', function (entry) {
if(entry.path.match(/__MACOSX/) == null){
// pause
if(currentFileCount - uploadedFileCount > 10) rStream.pause()
currentFileCount += 1
var fileName = entry.path;
let up = entry.pipe(uploadFromStream(s3,fileName))
up.on('uploaded', e => {
uploadedFileCount += 1
console.log(currentFileCount, uploadedFileCount)
//resume
if(currentFileCount - uploadedFileCount <= 10) rStream.resume()
if(uploadedFileCount === allFileCount) resolve()
entry.autodrain()
}).on('error', e => {
reject()
})
}
}).on('error', e => {
console.log("unzip error")
reject()
}).on('finish', e => {
allFileCount = currentFileCount
})
rStream.on('error', e=> {
console.log(e)
reject(e)
})
})
}
function uploadFromStream(s3,fileName) {
var pass = new stream.PassThrough();
var params = {Bucket: "bucket", Key: "hoge/unzip/" + fileName, Body: pass};
let request = s3.upload(params, function(err, data) {
if(err) pass.emit('error')
if(!err) pass.emit('uploaded')
})
request.on('httpUploadProgress', progress => {
console.log(progress)
})
return pass
}
有没有办法最小化内存并实现这一点?根据您的场景,我建议您使用RabbitMQ排队机制 它将如何帮助您: 任务1:将由express完成,但在返回响应之前,您将向RMQ队列发布json消息。此消息将包含与任务2和3相关的必需信息 现在您的任务2和3将由RMQ工作人员完成。现在,一旦您发布消息,并且如果RMQ工作人员有空,它将拾取消息并开始处理任务2和3。现在,如果您的节点收到多个请求,您只需将消息发布到RMQ
您将获得的优势是,您的请求上下文/消息永远不会丢失,RMQ将为您的消息排队。如果您想提高速度,可以生成多个工作进程。最具伸缩性的方法是创建一个工作队列,然后创建一个或多个子进程来处理排队的项目。这将允许您的Express进程保持自由,以服务web请求,并且子进程可以完成S3和解压缩工作。如果需要,您可以将队列处理设计为一次只处理一个项目。此解决方案不限于RabbitMQ。可能是redis支持的最常见的node.js解决方案,但任何第三方数据库/代理都可以。这将是一个选项,因为他们已经在使用S3