在运行下一页之前,是否等待Javascript Web抓取功能完成?

在运行下一页之前,是否等待Javascript Web抓取功能完成?,javascript,jquery,json,node.js,Javascript,Jquery,Json,Node.js,我正在尝试创建一个web scraper(在node.js中),它将从站点中提取信息,并将其写入文件。我已经构建了一个页面可以正常工作,但是当我尝试在for循环中使用这个函数,迭代多个游戏时,我在所有游戏中都得到了错误的数据 我知道这与Javascript的异步性质有关,我也读过回调函数,但我不知道如何将其应用到我的代码中。如有任何帮助,将不胜感激: for(x = 4648; x < 4650; x++){ //iterate over a few gameIDs, used in U

我正在尝试创建一个web scraper(在node.js中),它将从站点中提取信息,并将其写入文件。我已经构建了一个页面可以正常工作,但是当我尝试在for循环中使用这个函数,迭代多个游戏时,我在所有游戏中都得到了错误的数据

我知道这与Javascript的异步性质有关,我也读过回调函数,但我不知道如何将其应用到我的代码中。如有任何帮助,将不胜感激:

for(x = 4648; x < 4650; x++){  //iterate over a few gameIDs, used in URL for request
    scrapeGame(x);
}

function scrapeGame(gameId){
    //request from URL, scrape HTML to arrays as necessary
    //write final array to file
}
//迭代几个游戏ID,在URL中用于请求。
对于(x=4648;x<4650;x++){
//作为匿名函数传入回调。
//下面我将传递id和要执行的函数。
//并且,将我期望的结果定义为传入参数。
ScrapGame(x,函数(ScrapResult,err){
//这将*不会*执行*直到*您在下面的函数中调用它。
//这意味着for循环的执行将暂停。
//此函数接收传入的状态,
//在这种情况下,布尔值为true/false和错误(如果有)。
如果(结果){
//刮是真的,没什么可做的。
//for循环现在将进入下一个迭代。
console.log('Scrape Successful');
}否则{
//Scrape为false,将错误输出到console.log和
//中断循环以处理错误。
log('Scrape ERROR::'+err);
//请注意,我们在通话中呼叫break
//回调函数的作用域
//如果要继续,请删除打断
//下一个游戏ID并没有停止循环
打破
}
});
}
//此函数现在接受两个参数。
游戏功能(gameId,回调){
// ************************************************
//**在这里工作**
//从URL请求,根据需要将HTML刮到数组。
//将最终数组写入文件。
//创建文件后,执行回调并传递bool
//状态(真/假)。
// ************************************************
var request=require('request'),
cheerio=需要('cheerio'),
fs=需要('fs'),
类别=[],
类别列表=[],
ids=[],
线索=[],
值=[
'0',
'$200',
'$400',
'$600',
'$800',
'$1000',
'$400',
'$800',
'$1200',
'$1600',
'$2000'
],
valuelist=[],
答案=[],
数组=[],
文件=[],
showGameURL=http://www.j-archive.com/showgame.php?game_id=“+配子体,
showAnswerURLhttp://www.j-archive.com/showgameresponses.php?game_id=“+配子体;
请求(showGameURL、函数(err、resp、body){
如果(!err&&resp.statusCode==200){
var$=总负荷(车身);
//向类别添加一行以避免从0开始
类别。推送(“类别列表”);
//拉取所有类别以供以后使用
$('td.category_name')。每个(函数(){
var category=$(this.text();
类别。推送(类别);
});
//拉取所有线索ID(坐标),存储到1d数组
//拉动任何“卡在”字符串中的id,以防止重复
$(“[id*='stacked']”)。每个(函数(){
var id=$(this.attr('id');
id=id.toString();
id=id.substring(0,id.length-6);
id.push(id);
//如果是单J,则选择类别1-6
if(id.indexOf(“\u J”)!=-1){
变量catid=id.charAt(7);
categorylist.push(categories[catid]);
var valId=id.charAt(9);
valuelist.push(值[有效]);
}
//如果是双J,则选择类别7-12
else if(id.indexOf(“\u DJ”)!=-1){
var catid=parseInt(id.charAt(8))+6;
categorylist.push(categories[catid]);
var valId=parseInt(id.charAt(10))+5;
valuelist.push(值[有效]);
}
//如果是最终J,选择类别13
否则{
类别列表推送(类别[13]);
}
});
//提取所有线索文本,存储到1d数组
$('td.cule_text')。每个(函数(){
var clue=$(this.text();
线索。推(线索);
});
//将拉取值推送到大数组
array.push(ids);
array.push(categorylist);
array.push(valuelist);
数组。推送(线索);
//对不同URL的新请求以获取响应
请求(showAnswerURL、函数(err、resp、body){
如果(!err&&resp.statusCode==200){
var$=总负荷(车身);
$('.correct_response')。每个(函数(){
var answer=$(this.text();
回答。推(回答);
});
//将答案推送到大数组
数组。推送(应答);
//将数组合并到一维数组中以准备写入文件
对于(var i=0;i// Iterate over a few gameIDs, used in URL for request.
for (x = 4648; x < 4650; x++) {
  // Pass in the callback as an anonymous function.
  // So below I am passing in the id and the function I want to execute.
  // AND, defining the results I am expecting as passed in arguments. 
  scrapeGame(x, function(scrapeResult, err) {
    // This will *NOT* execute *UNTIL* you call it in the function below.
    // That means that the for loop's execution is halted. 
    // This function receives the status that is passed in, 
    // in this case, a boolean true/false and an error if any.
    if (scrapeResult) {
      // Scrape was true, nothing to do.
      // The for loop will now move on to the next iteration.
      console.log('Scrape Successful');
    } else {
      // Scrape was false, output error to console.log and 
      // break loop to handle error.
      console.log('Scrape ERROR :: ' + err);
      // Notice we are calling break while in the 
      // scope of the callback function
      // Remove the break if you want to just move onto
      // the next game ID and not stop the loop
      break;
    }
  });
}

// This function now accepts two arguments.
function scrapeGame(gameId, callback) {

  // ************************************************
  // ** Do Your Work Here **
  // Request from URL, scrape HTML to arrays as necessary.
  // Write final array to file.
  // After file creation, execute the callback and pass bool
  // status (true/false).
  // ************************************************

  var request = require('request'),
      cheerio = require('cheerio'),
      fs = require('fs'),
      categories = [],
      categorylist = [],
      ids = [],
      clues = [],
      values = [
          '0',
          '$200',
          '$400',
          '$600',
          '$800',
          '$1000',
          '$400',
          '$800',
          '$1200',
          '$1600',
          '$2000'
      ],
      valuelist = [],
      answers = [],
      array = [],
      file = [],
      showGameURL = 'http://www.j-archive.com/showgame.php?game_id=' + gameId,
      showAnswerURL = 'http://www.j-archive.com/showgameresponses.php?game_id=' + gameId;

  request(showGameURL, function(err, resp, body) {
    if (!err && resp.statusCode === 200) {
      var $ = cheerio.load(body);
      //add a row to categories to avoid starting at 0
      categories.push('Category List');
      //pull all categories to use for later
      $('td.category_name').each(function() {
        var category = $(this).text();
        categories.push(category);
      });
      //pull all clue IDs (coordinates), store to 1d array
      //pull any id that has "stuck" in the string, to prevent duplicates
      $("[id*='stuck']").each(function() {
        var id = $(this).attr('id');
        id = id.toString();
        id = id.substring(0, id.length - 6);
        ids.push(id);
        //if single J, pick category 1-6
        if (id.indexOf("_J_") !== -1) {
          var catid = id.charAt(7);
          categorylist.push(categories[catid]);
          var valId = id.charAt(9);
          valuelist.push(values[valId]);
        }
        //if double J, pick category 7-12
        else if (id.indexOf("_DJ_") !== -1) {
          var catid = parseInt(id.charAt(8)) + 6;
          categorylist.push(categories[catid]);
          var valId = parseInt(id.charAt(10)) + 5;
          valuelist.push(values[valId]);
        }
        //if final J, pick category 13
        else {
          categorylist.push(categories[13]);
        }
      });
      //pull all clue texts, store to 1d array
      $('td.clue_text').each(function() {
        var clue = $(this).text();
        clues.push(clue);
      });
      //push pulled values to big array
      array.push(ids);
      array.push(categorylist);
      array.push(valuelist);
      array.push(clues);

      //new request to different URL to pull responses
      request(showAnswerURL, function(err, resp, body) {
        if (!err && resp.statusCode === 200) {
          var $ = cheerio.load(body);

          $('.correct_response').each(function() {
            var answer = $(this).text();
            answers.push(answer);
          });
          //push answers to big array
          array.push(answers);
          //combine arrays into 1-d array to prep for writing to file
          for (var i = 0; i < array[0].length; i++) {
            var print = array[0][i] + "|" + array[1][i] + "|" + array[2][i] + "|" + array[3][i] + "|" + array[4][i];
            var stringPrint = print.toString();
            file.push(stringPrint);
          }
          //update string, add newlines, etc.
          var stringFile = JSON.stringify(file);
          stringFile = stringFile.split('\\').join('');
          stringFile = stringFile.split('","').join('\n');
          //write to file, eventually will append to end of one big file
          fs.writeFile('J_GAME_' + gameId + '.txt', stringFile, function(err) {

            //clear arrays used
            valuelist = [];
            answers = [];
            categories = [];
            categorylist = [];
            ids = [];
            clues = [];
            array = [];
            file = [];

            if (err) {
              // ******************************************
              // Callback false with error.
              callback(false, err);
              // ******************************************
            } else {
              console.log("Game #" + gameId + " has been scraped.");
              // ******************************************
              // Callback true with no error. 
              callback(true);
              // ******************************************
            }
          });
        }
      });
    }
  });
}
    var x = 4648;
    var myFunc = scrapeGame(x, function cb(){
        if(x >= 4650){
           return; 
        }
        x++;
        return myFunc(x, cb); 
    });



function scrapeGame(gameId){
    //request from URL, scrape HTML to arrays as necessary
    //write final array to file
}
function scrapeGame(gameId, cb){

//your code and set options

http.request(options, function(response){
    var result = "";
    response.on('data', function (chunk) {
                result += chunk;
             });
    response.on('end',function(){
               //write data here;

               //do the callback
               cb();    
            });
});

}