Node.js 木偶演员不';t提取所有元素
我正在用NodeJS编写一个脚本,以提取过去24小时内交易量最大的加密货币。 我想提取数组中Name、ticker和24h percentage的列,如下所示:Node.js 木偶演员不';t提取所有元素,node.js,puppeteer,Node.js,Puppeteer,我正在用NodeJS编写一个脚本,以提取过去24小时内交易量最大的加密货币。 我想提取数组中Name、ticker和24h percentage的列,如下所示: [{ name: 'Bitcoin', ticker: 'BTC', percentage: '20.62%' }, { name: 'Ethereum', ticker: 'ETH', percentage: '10.19%' }, ... ] 我的脚本看起来像这样,但一旦执行它,它将跳过一些行。 有人知道为什么它会随机跳过一些行吗
[{ name: 'Bitcoin', ticker: 'BTC', percentage: '20.62%' },
{ name: 'Ethereum', ticker: 'ETH', percentage: '10.19%' },
...
]
我的脚本看起来像这样,但一旦执行它,它将跳过一些行。
有人知道为什么它会随机跳过一些行吗?有更好的方法吗
let cryptoData = []
const browser = await puppeteer.launch({ args: ['--no-sandbox'], headless: true })
const page = await browser.newPage()
await page.setViewport({ width: 1536, height: 850 })
await page.goto('https://coinmarketcap.com/', { waitUntil: 'networkidle2' })
// Wait for tickers table to fully load
await page.waitForSelector('tr:nth-child(1) > td > .cmc-link > .sc-16r8icm-0 > .sc-16r8icm-0 > .sc-1eb5slv-0')
// Sort the list 24h descending
await page.waitForSelector('.stickyTop:nth-child(5) > div > .sc-9dqrx-0 > .sc-9dqrx-1 > .sc-1eb5slv-0')
await page.click('.stickyTop:nth-child(5) > div > .sc-9dqrx-0 > .sc-9dqrx-1 > .sc-1eb5slv-0')
// Wait for tickers table to fully load
await page.waitForSelector('tr:nth-child(1) > td > .cmc-link > .sc-16r8icm-0 > .sc-16r8icm-0 > .sc-1eb5slv-0')
let data = await page.evaluate(() => {
let tempData = []
for (let index = 1; index <= 100; index++) {
let name = document.querySelector(`tr:nth-child(${index}) > td > .cmc-link > .sc-16r8icm-0 > .sc-16r8icm-0 > .sc-1eb5slv-0`)
let ticker = document.querySelector(`tr:nth-child(${index}) > td > .cmc-link > .sc-16r8icm-0 > .sc-16r8icm-0 > .sc-1teo54s-2 > .sc-1eb5slv-0`)
let percentage = document.querySelector(`.cmc-table > tbody > tr:nth-child(${index}) > td > .iqsl6q-0`)
if (name && ticker && percentage) {
name = name.innerText
tempData.push({
id: index,
name,
ticker,
percentage,
})
}
}
return tempData
})
console.log(data)
await browser.close()
let cryptoData=[]
const browser=wait puppeter.launch({args:['--no sandbox'],headless:true})
const page=wait browser.newPage()
等待page.setViewport({宽度:1536,高度:850})
等待页面。转到('https://coinmarketcap.com/“,{waitUntil:'networkidle2'})
//等待tickers表完全加载
等待page.waitForSelector('tr:nth child(1)>td>.cmc link>.sc-16r8icm-0>.sc-16r8icm-0>.sc-1eb5slv-0'))
//按24小时降序排列列表
等待page.waitForSelector('.stickyTop:n子项(5)>div>.sc-9dqrx-0>.sc-9dqrx-1>.sc-1eb5slv-0')
等待页面。单击('.stickyTop:nth child(5)>div>.sc-9dqrx-0>.sc-9dqrx-1>.sc-1eb5slv-0')
//等待tickers表完全加载
等待page.waitForSelector('tr:nth child(1)>td>.cmc link>.sc-16r8icm-0>.sc-16r8icm-0>.sc-1eb5slv-0'))
让数据=等待页面。评估(()=>{
让tempData=[]
对于(让index=1;index td>.cmc link>.sc-16r8icm-0>.sc-16r8icm-0>.sc-1eb5slv-0`)
让ticker=document.querySelector(`tr:nth child(${index})>td>.cmc link>.sc-16r8icm-0>.sc-16r8icm-0>.sc-1teo54s-2>.sc-1eb5slv-0`)
让百分比=document.querySelector(`.cmc table>tbody>tr:nth child(${index})>td>.iqsl6q-0`)
if(名称、股票代码和百分比){
name=name.innerText
tempData.push({
id:索引,
名称
股票代码,
百分比,
})
}
}
返回临时数据
})
console.log(数据)
等待浏览器关闭()
问题在于,Puppeter没有加载整个页面,需要滚动到页面底部以加载延迟加载数据
为此,我使用了另一个用户的回复:
问题是Puppeter没有加载整个页面,需要滚动到页面底部以加载延迟加载数据 为此,我使用了另一个用户的回复:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto('https://www.yoursite.com');
await page.setViewport({
width: 1200,
height: 800
});
await autoScroll(page);
await page.screenshot({
path: 'yoursite.png',
fullPage: true
});
await browser.close();
})();
async function autoScroll(page){
await page.evaluate(async () => {
await new Promise((resolve, reject) => {
var totalHeight = 0;
var distance = 100;
var timer = setInterval(() => {
var scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if(totalHeight >= scrollHeight){
clearInterval(timer);
resolve();
}
}, 100);
});
});
}