Javascript 尝试使用casperjs遍历某些链接
这就是我目前拥有的:Javascript 尝试使用casperjs遍历某些链接,javascript,web-scraping,casperjs,Javascript,Web Scraping,Casperjs,这就是我目前拥有的: var casper = require('casper').create(); var fs = require('fs'); var folderName = 'CARD_DATA'; var fileName = 'allChampionDecks.txt'; var save = fs.pathJoin(fs.workingDirectory, folderName, fileName); // init jquery var casper = require
var casper = require('casper').create();
var fs = require('fs');
var folderName = 'CARD_DATA';
var fileName = 'allChampionDecks.txt';
var save = fs.pathJoin(fs.workingDirectory, folderName, fileName);
// init jquery
var casper = require('casper').create({
clientScripts: ['jquery.min.js']
});
casper.start(URL, function() {
});
var links = ["http://magic.wizards.com/en/events/coverage/mtgochamp14","http://magic.wizards.com/node/335986","http://magic.wizards.com/en/events/coverage/2014WC"];
var i = -1;
var linkData = '';
// iterate
casper.then(function() {
this.each(links, function() {
i++;
this.thenOpen((links[i]), function() {
linkData += this.evaluate(getLinkDeckData);
});
});
fs.write(save, linkData + '\n', 'w');
});
// scrape
function getLinkDeckData() {
var meta = $('.deck-meta h4');
var event = $('.deck-meta h5');
var allDecks = $('.toggle-text .deck-list-text');
var json = '{';
for(var i = 0; i < meta.length; i++) {
json += '"event": "'+$(event[i]).text().trim()+'",'
+'"deckName": "'+$(meta[i]).text()+'",'
+'"deck": [';
var cardCount = $(allDecks[i]).find('.sorted-by-overview-container .row .card-count');
var cardName = $(allDecks[i]).find('.sorted-by-overview-container .row .card-name');
for(var j = 0; j < cardCount.length; j++) {
if(j < cardCount.length-1)
json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"},';
else
json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"}';
}
json += '],'
+'"sideboard": [';
var cardCount = $(allDecks[i]).find('.sorted-by-sideboard-container .row .card-count');
var cardName = $(allDecks[i]).find('.sorted-by-sideboard-container .row .card-name');
for(var j = 0; j < cardCount.length; j++) {
if(j < cardCount.length-1)
json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"},';
else
json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"}';
}
if(i < meta.length-1)
json += '],'
else
json += ']}'
/**/
}
return json;
}
casper.run();
所有then*
(和wait*
)函数都是异步步进函数。当您进行循环并在循环内调用casper.thenOpen()
时,您将安排一个开始步骤,并附带一个然后回调作为单独的步骤
问题是,当您尝试编写linkData
时,它不在单独的步骤中。只需将其包装在casper.then()
中即可
固定片段:
casper.then(function() {
links.forEach(links, function(link, i) {
this.thenOpen(link, function() {
linkData += this.evaluate(getLinkDeckData);
});
});
this.then(function(){
fs.write(save, linkData + '\n', 'w');
});
});
您应该使用Array.prototype.forEach
,而不是使用CasperJS'each
。这样,您就不需要全局计数器变量。我对循环有一个问题,它是then()
或thenEvaluate()
在循环中不起作用:
casper.then(function() {
links.forEach(links, function(link, i) {
this.thenOpen(link, function() {
linkData += this.evaluate(getLinkDeckData);
});
});
this.then(function(){
fs.write(save, linkData + '\n', 'w');
});
});