Javascript NodeJS:下载10';000+;来自服务器的图像
我需要从服务器下载10000多张图片。为此,我写了这个小脚本Javascript NodeJS:下载10';000+;来自服务器的图像,javascript,node.js,file,download,Javascript,Node.js,File,Download,我需要从服务器下载10000多张图片。为此,我写了这个小脚本 const http = require('http') const fs = require('fs') const opt = { agent: new http.Agent({ keepAlive: true, maxSockets: 5 }), headers: { 'user-agent': 'foo' } } for (let i = 0; i < 10000; i++)
const http = require('http')
const fs = require('fs')
const opt = {
agent: new http.Agent({
keepAlive: true,
maxSockets: 5
}),
headers: {
'user-agent': 'foo'
}
}
for (let i = 0; i < 10000; i++) {
http.get(`http://www.example.com/${i}.png`, opt, (res) => {
console.log(i)
if (res.statusCode !== 200) return
res.pipe(fs.createWriteStream(`resource/${i}.png`))
})
}
consthttp=require('http'))
常量fs=require('fs')
常数opt={
代理:新的http.agent({
基帕利夫:是的,
maxSockets:5
}),
标题:{
“用户代理”:“foo”
}
}
for(设i=0;i<10000;i++){
http.get(`http://www.example.com/${i}.png`,opt,(res)=>{
控制台日志(i)
如果(res.statusCode!==200)返回
res.pipe(fs.createWriteStream(`resource/${i}.png`))
})
}
问题是,过了一段时间,我得到了错误:read ECONNRESET
或错误:套接字挂起
我已经设置了keepAlive:true
和maxSockets:5
我唯一的解释是服务器有某种DOS保护,并且阻止了我的请求
你知道怎么解决这个问题吗?我需要实现“冷却”吗?首先,您需要一个变量来跟踪返回的图像和数量
var returnedImages = {}
var returnedCount = 0
接下来,您需要将该循环包装到函数中,并跳过已经下载的图像
var fetchImgs = function() {
for (let i = 0; i < 10000; i++) {
if(returnedImages[i]) continue;
http.get(`http://www.example.com/${i}.png`, opt, (res) => {
console.log(i)
if (res.statusCode !== 200) return
res.pipe(fs.createWriteStream(`resource/${i}.png`))
returnedCount++
})
}
}
var fetchImgs=function(){
for(设i=0;i<10000;i++){
如果(返回图像[i])继续;
http.get(`http://www.example.com/${i}.png`,opt,(res)=>{
控制台日志(i)
如果(res.statusCode!==200)返回
res.pipe(fs.createWriteStream(`resource/${i}.png`))
返回计数++
})
}
}
然后可以在try/catch循环中递归调用该函数
while(returnedCount < 10000) {
try {
returnedCount = 0
fetchImgs()
} catch(e) {
if(returnedCount == 0) throw new Error("Cannot fetch anymore")
}
}
while(returnedCount<10000){
试一试{
returnedCount=0
fetchImgs()
}捕获(e){
如果(returnedCount==0)抛出新错误(“无法再提取”)
}
}
catch块中的if检查是否未返回任何映像,如果返回,则会出错(假设服务器不再允许从您的ip进行连接)
这不是万无一失的,但你可以根据自己的喜好进行调整:)
希望这有帮助您可以使用@Werlious提到的批处理,或者我建议您使用所有的计算能力。由于您的事务是原子的且幂等的,所以您可以利用这种并行方法,这也将扩展到更多的数字
// parent.js
var child_process = require('child_process');
var numchild = require('os').cpus().length;
var done = 0;
var filesListJSON = [{
"file_name": "Blessed_Titles.jpg",
"url": "https://i.imgur.com/FRDibHa.jpg",
"downloadId": "6r44r4k340rvvr"
}, {
"file_name": "Blessed_Titles2.jpg",
"url": "https://i.imgur.com/FRDibHa2.jpg",
"downloadId": "6r44r4k340rvvr"
}, {
"file_name": "Blessed_Titles3.jpg",
"url": "https://i.imgur.com/FRDibHa3.jpg",
"downloadId": "6r44r4k340rvvr"
}];
// split the array into number of parallel threads avalible
var chunks = _.chunk(filesListJSON, numchild);
for (var i = 0; i < numchild; i++) {
var
var child = child_process.fork('./child');
//send the chunk of the list to respective thread.
child.send(chunks[i]);
//ps please check the count and logic for yourself I have not tested this.
child.on('message', function (message) {
console.log('[parent] received message from child:', message);
done++;
if (done === numchild) {
console.log('[parent] received all results');
...
}
});
}
// child.js
process.on('message', function (list) {
console.log('[child] received message from server:', message);
downloadFiles(list, function (done) {
console.log("Done downloading files : " + list.length);
process.send({
child: process.pid,
result: done
});
process.disconnect();
});
});
function downloadFiles(list, cb) {
//loop over list
//logic to download files
//cb(true)
}
//parent.js
var child_process=require('child_process');
var numchild=require('os').cpus().length;
var done=0;
var filesListJSON=[{
“文件名”:“祝福的标题.jpg”,
“url”:”https://i.imgur.com/FRDibHa.jpg",
“下载ID”:“6r44r4k340rvvr”
}, {
“文件名”:“祝福的标题2.jpg”,
“url”:”https://i.imgur.com/FRDibHa2.jpg",
“下载ID”:“6r44r4k340rvvr”
}, {
“文件名”:“祝福的标题3.jpg”,
“url”:”https://i.imgur.com/FRDibHa3.jpg",
“下载ID”:“6r44r4k340rvvr”
}];
//将阵列拆分为可用的并行线程数
var chunks=u.chunk(filelistJSON,numchild);
对于(变量i=0;i
您可以使用setInterval
,这样页面就不会挂起。