Javascript PhantomJS抓取多个页面
我试图让我的PhantomJS使用while循环抓取多个页面,但是我意识到它是异步的,只返回最后一个页面的结果。有没有办法让它返回每页的结果 下面是我的代码:Javascript PhantomJS抓取多个页面,javascript,asynchronous,phantomjs,web-crawler,Javascript,Asynchronous,Phantomjs,Web Crawler,我试图让我的PhantomJS使用while循环抓取多个页面,但是我意识到它是异步的,只返回最后一个页面的结果。有没有办法让它返回每页的结果 下面是我的代码: var _output = {'cookies':[],'resources':{'js':[]}}; var fs = require('fs'); var file_h = fs.open('file.csv', 'r'); var line = file_h.readLine(); while(line) { var pa
var _output = {'cookies':[],'resources':{'js':[]}};
var fs = require('fs');
var file_h = fs.open('file.csv', 'r');
var line = file_h.readLine();
while(line)
{
var page = require('webpage').create(),
system = require('system'),
address;
console.log(line);
// open web page
phantom.cookiesEnabled = true;
address = line;
page.open(address, function (status) {
console.log("status " + status);
if(status=='success'){
_output.cookies = phantom.cookies; // record cookies
_output.cookies.forEach(function(cookie){
console.log("cookie " + cookie);
});
}else{
console.log('Unable to open provided URL: '+address);
phantom.exit(-2); // -2: unable to open provided URL
}
});
// to avoid errors detected while parsing the page (eg. Syntax Error, Type Error, etc.)
// getting into stdout, so breaking the JSON decoding of returned output.
page.onError = function (msg, trace) {
}
line = file_h.readLine();
}
file_h.close();
对异步任务使用Promise
var page = require('webpage').create(),
system = require('system');
new Promise(function(resolve, reject) {
var _output = {'cookies':[],'resources':{'js':[]}};
var fs = require('fs');
var file_h = fs.open('file.csv', 'r');
var line = file_h.readLine();
var promises = [];
while(line)
{
promises.push(process_line(line));
}
file_h.close();
Promise.all(promises).then(resolve, reject);
function process_line(line) {
return Promise((resolve, reject) => {
var address;
console.log(line);
// open web page
phantom.cookiesEnabled = true;
address = line;
page.open(address, function (status) {
console.log("status " + status);
if(status=='success'){
_output.cookies = phantom.cookies; // record cookies
_output.cookies.forEach(function(cookie){
console.log("cookie " + cookie);
});
resolve(status)
}else{
reject('Unable to open provided URL: '+address);
}
});
// to avoid errors detected while parsing the page (eg. Syntax Error, Type Error, etc.)
// getting into stdout, so breaking the JSON decoding of returned output.
page.onError = function (msg, trace) {
reject(msg);
}
line = file_h.readLine();
});
}
})
.then((statuses) => {
// use the results
})
.catch((err) => {
// report an error
phantom.exit(-2); // -2: unable to open provided URL
});
谢谢,我认为promise不适用于PhantomJS当尝试代码时PhantomJS抛出错误
SyntaxError:Parse error
并挂起。您使用哪个版本测试此代码?我没有检查语法错误。我在演示它是如何工作的。可能的复制品只有这么多:。(请不要忘记搜索)