Javascript 我如何解决这个分页网页与木偶刮?正确地定位节点,但在for循环中单击它会不断抛出错误

Javascript 我如何解决这个分页网页与木偶刮?正确地定位节点,但在for循环中单击它会不断抛出错误,javascript,node.js,web-scraping,puppeteer,Javascript,Node.js,Web Scraping,Puppeteer,因此,我试图从一个网站(这是一个有公共信息的网站)上搜集一些基本的公司数据。我正在使用node和puppeter来实现这一点。下面的工作代码成功地抓取了第一页,但是当点击第二页时,我得到了错误:执行上下文被破坏,很可能是因为导航。,现在我得到一个错误,说我的函数不是函数 有谁能指出我做错了什么,以及刮去所有28页的最佳方法是什么 成功地刮取第一页 const puppeteer = require("puppeteer"); // var fs = require("fs"); const fs

因此,我试图从一个网站(这是一个有公共信息的网站)上搜集一些基本的公司数据。我正在使用node和puppeter来实现这一点。下面的工作代码成功地抓取了第一页,但是当点击第二页时,我得到了
错误:执行上下文被破坏,很可能是因为导航。
,现在我得到一个错误,说我的函数不是函数

有谁能指出我做错了什么,以及刮去所有28页的最佳方法是什么

成功地刮取第一页

const puppeteer = require("puppeteer");
// var fs = require("fs");
const fsp = require("fs").promises;
const fs = require("fs");

let pageCount = 1; // 21 full pages of content
let companyRows;
function delay(time) {
  return new Promise(function(resolve) {
    setTimeout(resolve, time);
  });
}

(async () => {
  try {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    page.on("console", msg => {
      for (let i = 0; i < msg.args().length; ++i)
        console.log(`${i}: ${msg.args()[i]}`);
    });

    await page.goto(
      "http://dpsstnet.state.or.us/IRIS_PublicInquiry/PrivateSecurity/SMSAgcyTable.aspx"
    );

    //Clicks a tag by id

    await page.click("#btnNaLL");

    await page.waitFor(1000);

    const result = await page.evaluate(() => {
      let row = document.querySelectorAll("tr");
      let companyData = [];

      row.forEach(el => {
        let company = {};
        let count = 0;
        for (data of el.cells) {
          switch (count) {
            case 0:
              company.name = data.innerText.trim();
            case 1:
              company.primaryContact = data.innerText.trim();
            case 2:
              company.address = data.innerText.trim();
            case 3:
              company.phone = data.innerText.trim();
            case 4:
              company.county = data.innerText.trim();
            case 5:
              company.status = data.innerText.trim();
            default:
              company.default = data.innerText.trim();
          }
          count++;
          companyData.push(company);
          //GOT SOME STUUFFFF
          console.log(JSON.stringify(companyData));
        }
      });

      // await page.waitFor(3000);
      // await fsp.writeFile("./json/file.json", result.stringify());
      companyData = companyData.filter((a, b) => companyData.indexOf(a) === b);
      companyData = companyData.filter(e => e.status === "Active");
      return companyData;
    });

    // fsp.writeFile(
    //   "./json/file.json",
    //   JSON.stringify(companyData, null, 2),
    //   err =>
    //     err
    //       ? console.error("Data not written!", err)
    //       : console.log("Data Written")
    // );

    await fsp.writeFile(
      "./json/file.json",
      JSON.stringify(result, null, 2),
      err =>
        err
          ? console.error("Data not written!", err)
          : console.log("Data Written")
    );
    await page.screenshot({
      path: "./screenshots/page1.png"
    });
    await page.pdf({ path: "./pdfs/page1.pdf" });
    await browser.close();
    return result;
  } catch (error) {
    console.log(error);
  }
})();

const puppeter=require(“木偶演员”);
//var fs=要求(“fs”);
const fsp=要求(“fs”)承诺;
常数fs=要求(“fs”);
让pageCount=1;//21整页内容
让同伴来;
功能延迟(时间){
返回新承诺(函数(解析){
setTimeout(解析,时间);
});
}
(异步()=>{
试一试{
const browser=wait puppeter.launch();
const page=wait browser.newPage();
第页(“控制台”,msg=>{
for(设i=0;i{
let row=document.querySelectorAll(“tr”);
让companyData=[];
row.forEach(el=>{
让公司={};
让计数=0;
用于(el.单元格的数据){
开关(计数){
案例0:
company.name=data.innerText.trim();
案例1:
company.primaryContact=data.innerText.trim();
案例2:
company.address=data.innerText.trim();
案例3:
company.phone=data.innerText.trim();
案例4:
company.county=data.innerText.trim();
案例5:
company.status=data.innerText.trim();
违约:
company.default=data.innerText.trim();
}
计数++;
companyData.push(公司);
//有一些笨蛋
log(JSON.stringify(companyData));
}
});
//等待页面。等待(3000);
//等待fsp.writeFile(“./json/file.json”,result.stringify());
companyData=companyData.filter((a,b)=>companyData.indexOf(a)==b);
companyData=companyData.filter(e=>e.status==“活动”);
返回公司数据;
});
//写文件(
//“/json/file.json”,
//stringify(companyData,null,2),
//错误=>
//错误
//?控制台错误(“数据未写入!”,错误)
//:console.log(“数据写入”)
// );
等待fsp.writeFile(
“/json/file.json”,
stringify(结果,null,2),
错误=>
犯错误
?控制台错误(“数据未写入!”,错误)
:console.log(“数据写入”)
);
等待页面。屏幕截图({
路径:“./screenshots/page1.png”
});
等待page.pdf({路径:“./pdfs/page1.pdf”});
等待浏览器关闭();
返回结果;
}捕获(错误){
console.log(错误);
}
})();
重写代码以浏览页面(不工作) 目前,我在运行此程序时得到“clickLink不是一个函数”

const puppeteer = require("puppeteer");
const fsp = require("fs").promises;
const fs = require("fs");

let pageCount = 1; // 21 full pages of content
let companyRows;
let pageToClick;
function delay(time) {
  return new Promise(function(resolve) {
    setTimeout(resolve, time);
  });
}

(async () => {
  try {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    const clickLink = link => {
      page.click(link);
      page.waitFor(1000);
    };
    page.on("console", msg => {
      for (let i = 0; i < msg.args().length; ++i)
        console.log(`${i}: ${msg.args()[i]}`);
    });

    await page.goto(
      "http://dpsstnet.state.or.us/IRIS_PublicInquiry/PrivateSecurity/SMSAgcyTable.aspx"
    );

    //Clicks a tag by id

    await page.click("#btnNaLL");

    await page.waitFor(1000);
    let fullResult = [];
    let result;

    result = await page.evaluate(
      (fullResult, clickLink => {
        let row = document.querySelectorAll("tr");
        let companyData = [];
        let pageList = document.querySelectorAll("b > a");

        for (let step = 0; step < 2; step++) {
          row.forEach(el => {
            let company = {};
            let count = 0;
            for (data of el.cells) {
              switch (count) {
                case 0:
                  company.name = data.innerText.trim();
                case 1:
                  company.primaryContact = data.innerText.trim();
                case 2:
                  company.address = data.innerText.trim();
                case 3:
                  company.phone = data.innerText.trim();
                case 4:
                  company.county = data.innerText.trim();
                case 5:
                  company.status = data.innerText.trim();
                default:
                  company.default = data.innerText.trim();
              }
              count++;
              companyData.push(company);
              //GOT SOME STUUFFFF
              console.log(JSON.stringify(companyData));
            }
          });

          companyData = companyData.filter(
            (a, b) => companyData.indexOf(a) === b
          );
          companyData = companyData.filter(e => e.status === "Active");
          fullResult = [...fullResult, ...companyData];
          // console.log(JSON.stringify(pageList[step].innerText));
          clickLink(pageList[step]);
        }
        return fullResult;
      },
      fullResult,
      clickLink
    );

    await fsp.writeFile(
      "./json/file.json",
      JSON.stringify(result, null, 2),
      err =>
        err
          ? console.error("Data not written!", err)
          : console.log("Data Written")
    );
    //*
    await page.screenshot({
      path: "./screenshots/page1.png"
    });
    await page.pdf({ path: "./pdfs/page1.pdf" });
    await browser.close();
    return result;
  } catch (error) {
    console.log(error);
  }
})();

const puppeter=require(“木偶演员”);
const fsp=要求(“fs”)承诺;
常数fs=要求(“fs”);
让pageCount=1;//21整页内容
让同伴来;
让pageToClick;
功能延迟(时间){
返回新承诺(函数(解析){
setTimeout(解析,时间);
});
}
(异步()=>{
试一试{
const browser=wait puppeter.launch();
const page=wait browser.newPage();
const clickLink=link=>{
页面。单击(链接);
第页等待(1000);
};
第页(“控制台”,msg=>{
for(设i=0;i{
let row=document.querySelectorAll(“tr”);
让companyData=[];
让pageList=document.querySelectorAll(“b>a”);
对于(步骤=0;步骤<2;步骤++){
row.forEach(el=>{
让公司={};
让计数=0;
用于(el.单元格的数据){
开关(计数){
案例0:
company.name=data.innerText.trim();
案例1:
company.primaryContact=data.innerText.trim();
案例2:
company.address=data.innerText.trim();
案例3:
company.phone=data.innerText.trim();
案例4:
company.county=data.innerText.trim();
案例5:
company.status=data.innerText.trim();
违约:
company.default=data.innerText.trim();
}
计数++;
companyData.push(公司);
//有一些笨蛋
log(JSON.stringify(companyData));
}
});
companyData=companyData.filter(
(a,b)=>companyData.indexOf(a)==b
);
companyData=companyData.filter(e=>e.status==“活动”);
fullResult=[…fullResult,…companyData];
//log(JSON.stringify(pageList[step].innerText));
单击链接(页面列表[步骤]);
}
返回完整结果;
},