为《木偶演员》改编web scraper JavaScript代码

为《木偶演员》改编web scraper JavaScript代码,javascript,Javascript,我几乎没有编码知识,我正在尝试改编一些教程,但没有成功 我希望修改的JavaScript代码(脚本A)被粘贴到Chrome开发者控制台中,并成功地提取了我需要的数据。此JavaScript代码段标识了电子商务站点中最大的价格图形 第二个教程(脚本B)从shell中运行,并调用puppeter库。此脚本提取一些酒店预订数据并成功运行 我希望使用Puppeter库使脚本A从shell运行 这是一个脚本- let elements = [ ...document.querySelectorAll('

我几乎没有编码知识,我正在尝试改编一些教程,但没有成功

我希望修改的JavaScript代码(脚本A)被粘贴到Chrome开发者控制台中,并成功地提取了我需要的数据。此JavaScript代码段标识了电子商务站点中最大的价格图形

第二个教程(脚本B)从shell中运行,并调用puppeter库。此脚本提取一些酒店预订数据并成功运行

我希望使用Puppeter库使脚本A从shell运行

这是一个脚本-

let elements = [
 ...document.querySelectorAll(' body *')
]

function createRecordFromElement(element) {
 const text = element.textContent.trim()
 var record = {}
 const bBox = element.getBoundingClientRect()

if(text.length <= 30 && !(bBox.x == 0 && bBox.y == 0)) {
 record['fontSize'] = parseInt(getComputedStyle(element)['fontSize']) } 
 record['y'] = bBox.y 
 record['x'] = bBox.x 
 record['text'] = text 
 return record 
} 
let records = elements.map(createRecordFromElement) 

function canBePrice(record) { 
 if( record['y'] > 600 ||
  record['fontSize'] == undefined ||
  !record['text'].match(/(^(US ){0,1}(rs\.|Rs\.|RS\.|\$|₹|INR|USD|CAD|C\$){0,1}(\s){0,1}[\d,]+(\.\d+){0,1}(\s){0,1}(AED){0,1}$)/)
)
 return false
 else return true
}

let possiblePriceRecords = records.filter(canBePrice)
let priceRecordsSortedByFontSize = possiblePriceRecords.sort(function(a, b) {
if (a['fontSize'] == b['fontSize']) return a['y'] > b['y']
return a['fontSize'] < b['fontSize']

})
console.log(priceRecordsSortedByFontSize[0]['text']);console.log(priceRecordsSortedByFontSize[1]['text']);
我曾多次尝试将脚本A改编成脚本B的格式。出现了各种各样的错误。如果没有编码知识,我将一事无成

这是我尝试过的众多变体之一,称为脚本C-

const puppeteer = require('puppeteer-core');

let bookingUrl = 'https://shop.coles.com.au/a/dianella/product/moccona-coffee-capsules-espresso-7';
(async () => {
    const browser = await puppeteer.launch({
        executablePath: '/usr/bin/chromium-browser',
        headless: true
        });
    const page = await browser.newPage();
    await page.setViewport({ width: 1920, height: 926 });
    await page.goto(bookingUrl);

    // get hotel details
    let hotelData = await page.evaluate(() => {
        let hotels = [];
        // get the hotel elements
        let elements = [
 ...document.querySelectorAll(' body *')
]

function createRecordFromElement(element) {
 const text = element.textContent.trim()
 var record = {}
 const bBox = element.getBoundingClientRect()

if(text.length <= 30 && !(bBox.x == 0 && bBox.y == 0)) {
 record['fontSize'] = parseInt(getComputedStyle(element)['fontSize']) } 
 record['y'] = bBox.y 
 record['x'] = bBox.x 
 record['text'] = text 
 return record 
} 
let records = elements.map(createRecordFromElement) 

function canBePrice(record) { 
 if( record['y'] > 600 ||
  record['fontSize'] == undefined ||
  !record['text'].match(/(^(US ){0,1}(rs\.|Rs\.|RS\.|\$|₹|INR|USD|CAD|C\$){0,1}(\s){0,1}[\d,]+(\.\d+){0,1}(\s){0,1}(AED){0,1}$)/)
)
 return false
 else return true
}

let possiblePriceRecords = records.filter(canBePrice)
let priceRecordsSortedByFontSize = possiblePriceRecords.sort(function(a, b) {
if (a['fontSize'] == b['fontSize']) return a['y'] > b['y']
return a['fontSize'] < b['fontSize']

})
console.log(priceRecordsSortedByFontSize[0]['text']);
})();
const puppeter=require('puppeter-core');
让我们预订吧https://shop.coles.com.au/a/dianella/product/moccona-coffee-capsules-espresso-7';
(异步()=>{
const browser=wait puppeter.launch({
可执行路径:'/usr/bin/chromium browser',
无头:是的
});
const page=wait browser.newPage();
等待page.setViewport({宽度:1920,高度:926});
等待页面。转到(bookingUrl);
//获取酒店详细信息
让hotelData=wait page.evaluate(()=>{
让酒店=[];
//获取酒店元素
设元素=[
…document.querySelectorAll('body*'))
]
函数createRecordFromElement(元素){
const text=element.textContent.trim()
var记录={}
const bBox=element.getBoundingClientRect()
如果(文本长度600||
记录['fontSize']==未定义||
!record['text'].match(/(^(US){0,1}(rs\.| rs\.| rs\.)|\$|₹|印度卢比{0,1}(\s){0,1}[\d,]+(\.\d+{0,1}(\s){0,1}(AED){0,1}/)
)
返回错误
否则返回true
}
let possiblePriceRecords=records.filter(canBePrice)
让PriceRecordsOrderedByFontSize=possiblePriceRecords.sort(函数(a,b){
如果(a['fontSize']==b['fontSize'])返回a['y']>b['y']
返回a['fontSize']
以下是教程的链接以获取信息-


脚本C中是否有任何明显的错误?

阅读脚本C后,您似乎没有犯任何错误,而您试图访问的网站决定阻止scraper Bot

在域上快速查找主机表明,他们正在使用security service section.io阻止其网站上的刮板机器人。请参阅:

shop.coles.com.au是shop.coles.com.au.c.section.io的别名。

shop.coles.com.au.c.section.io是shop.coles.com.au.x.section.io的别名

感谢您查看代码。我没想到他们会阻止机器人!
const puppeteer = require('puppeteer-core');

let bookingUrl = 'https://shop.coles.com.au/a/dianella/product/moccona-coffee-capsules-espresso-7';
(async () => {
    const browser = await puppeteer.launch({
        executablePath: '/usr/bin/chromium-browser',
        headless: true
        });
    const page = await browser.newPage();
    await page.setViewport({ width: 1920, height: 926 });
    await page.goto(bookingUrl);

    // get hotel details
    let hotelData = await page.evaluate(() => {
        let hotels = [];
        // get the hotel elements
        let elements = [
 ...document.querySelectorAll(' body *')
]

function createRecordFromElement(element) {
 const text = element.textContent.trim()
 var record = {}
 const bBox = element.getBoundingClientRect()

if(text.length <= 30 && !(bBox.x == 0 && bBox.y == 0)) {
 record['fontSize'] = parseInt(getComputedStyle(element)['fontSize']) } 
 record['y'] = bBox.y 
 record['x'] = bBox.x 
 record['text'] = text 
 return record 
} 
let records = elements.map(createRecordFromElement) 

function canBePrice(record) { 
 if( record['y'] > 600 ||
  record['fontSize'] == undefined ||
  !record['text'].match(/(^(US ){0,1}(rs\.|Rs\.|RS\.|\$|₹|INR|USD|CAD|C\$){0,1}(\s){0,1}[\d,]+(\.\d+){0,1}(\s){0,1}(AED){0,1}$)/)
)
 return false
 else return true
}

let possiblePriceRecords = records.filter(canBePrice)
let priceRecordsSortedByFontSize = possiblePriceRecords.sort(function(a, b) {
if (a['fontSize'] == b['fontSize']) return a['y'] > b['y']
return a['fontSize'] < b['fontSize']

})
console.log(priceRecordsSortedByFontSize[0]['text']);
})();