Javascript 木偶手:在桌子上刮网(字符串数组?)

Javascript 木偶手:在桌子上刮网(字符串数组?),javascript,node.js,web-scraping,puppeteer,Javascript,Node.js,Web Scraping,Puppeteer,我试图从一个表中提取数据,我可以提取updateDate,但是我遇到了列和行的问题 我正在尝试清理的表嵌套在id为col2的td中 我的问题: // Find Table Rows console.log('Searching for COVID-19 Data from Orange County'); // Table Rows let tableRows = await page.$$('#col2 > div > table > tbody > tr'); //

我试图从一个表中提取数据,我可以提取
updateDate
,但是我遇到了列和行的问题

我正在尝试清理的表嵌套在id为
col2
td

我的问题:

// Find Table Rows
console.log('Searching for COVID-19 Data from Orange County');

// Table Rows
let tableRows = await page.$$('#col2 > div > table > tbody > tr');
// console.log(tableRows);

// Check For Table Rows
if (tableRows.length > 0) {
  console.log('Table Rows found');

  // Update Date (Length: 10)
  if (await tableRows[2].$$('tr > td')) {
    // Assign Element (First Row)
    let updateField = String(await tableRows[2].$eval('tr > td', td => td.innerText.trim()));

    // Check If Matches
    if (updateField.match(/(as of [0-9][0-9]\/[0-9][0-9]\/[0-9][0-9][0-9][0-9])/)) {
      const updateDate = updateField.slice(51, updateField.length - 1).trim();
      console.log(`Update Date: ${updateDate}`);
    }
    else {
      throw error('Error: Update Date doesn\'t match format');
    } 
  }

  // Cases
  if (await tableRows[5].$$('tr > td')) {
    // Assign Element (First Row)
    let totalCasesField = String(await tableRows[5].$eval('tr > td', td => td.innerText.trim()));
    console.log(totalCasesField);
  }
我似乎不知道如何正确地查询行,所以我可以得到所有的数字数据(每行一个字符串数组)

表格(来自检查员):

// Find Table Rows
console.log('Searching for COVID-19 Data from Orange County');

// Table Rows
let tableRows = await page.$$('#col2 > div > table > tbody > tr');
// console.log(tableRows);

// Check For Table Rows
if (tableRows.length > 0) {
  console.log('Table Rows found');

  // Update Date (Length: 10)
  if (await tableRows[2].$$('tr > td')) {
    // Assign Element (First Row)
    let updateField = String(await tableRows[2].$eval('tr > td', td => td.innerText.trim()));

    // Check If Matches
    if (updateField.match(/(as of [0-9][0-9]\/[0-9][0-9]\/[0-9][0-9][0-9][0-9])/)) {
      const updateDate = updateField.slice(51, updateField.length - 1).trim();
      console.log(`Update Date: ${updateDate}`);
    }
    else {
      throw error('Error: Update Date doesn\'t match format');
    } 
  }

  // Cases
  if (await tableRows[5].$$('tr > td')) {
    // Assign Element (First Row)
    let totalCasesField = String(await tableRows[5].$eval('tr > td', td => td.innerText.trim()));
    console.log(totalCasesField);
  }

我的代码:

// Find Table Rows
console.log('Searching for COVID-19 Data from Orange County');

// Table Rows
let tableRows = await page.$$('#col2 > div > table > tbody > tr');
// console.log(tableRows);

// Check For Table Rows
if (tableRows.length > 0) {
  console.log('Table Rows found');

  // Update Date (Length: 10)
  if (await tableRows[2].$$('tr > td')) {
    // Assign Element (First Row)
    let updateField = String(await tableRows[2].$eval('tr > td', td => td.innerText.trim()));

    // Check If Matches
    if (updateField.match(/(as of [0-9][0-9]\/[0-9][0-9]\/[0-9][0-9][0-9][0-9])/)) {
      const updateDate = updateField.slice(51, updateField.length - 1).trim();
      console.log(`Update Date: ${updateDate}`);
    }
    else {
      throw error('Error: Update Date doesn\'t match format');
    } 
  }

  // Cases
  if (await tableRows[5].$$('tr > td')) {
    // Assign Element (First Row)
    let totalCasesField = String(await tableRows[5].$eval('tr > td', td => td.innerText.trim()));
    console.log(totalCasesField);
  }
像这样的

const puppeter=require('puppeter');
(异步函数main(){
试一试{
const browser=wait puppeter.launch();
const[page]=wait browser.pages();
等待页面。转到('https://www.ochealthinfo.com/phs/about/epidasmt/epi/dip/prevention/novel_coronavirus');
常量数据=等待页面。评估(()=>{
const table=document.querySelector(“#col2>div>table+table”);
const rowsWithNumbers=[…table.rows].slice(3,9);
const numbers=rowsWithNumbers.map(
行=>[…行.单元格].slice(1).map(单元格=>cell.innerText)
);
返回号码;
});
控制台日志(数据);
等待浏览器关闭();
}捕捉(错误){
控制台错误(err);
}
})();
结果:

[
  ['42', '26', '16', '0',  '21', '13', '8'],
  ['22', '13', '9', '0',  '10', '8', '4'],
  ['7', '6', '1', '0', '5', '2', '0'],
  ['12', '7', '5', '0',  '5', '3', '4'],
  ['1', '0', '1', '0', '1', '0', '0'],
  ['0', '0', '0', '0', '0', '0', '0']
]