Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/javascript/422.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Javascript 如何使用Puppeter改进许多URL的截图脚本?_Javascript_Node.js_Puppeteer - Fatal编程技术网

Javascript 如何使用Puppeter改进许多URL的截图脚本?

Javascript 如何使用Puppeter改进许多URL的截图脚本?,javascript,node.js,puppeteer,Javascript,Node.js,Puppeteer,我声明我不是一个专业的程序员:),我已经尝试了大约一个月的脚本开发,一次制作许多URL的屏幕截图,并将它们保存在本地计算机上 我将在脚本中执行以下操作: const puppeteer = require('puppeteer'); const os = require('os'); const username = require('username'); //I identify the operating system and the architect of the CPU to ru

我声明我不是一个专业的程序员:),我已经尝试了大约一个月的脚本开发,一次制作许多URL的屏幕截图,并将它们保存在本地计算机上

我将在脚本中执行以下操作:

const puppeteer = require('puppeteer');
const os = require('os');
const username = require('username');

//I identify the operating system and the architect of the CPU to run the Google Chrome Patch
var architetturaCPU = os.arch();
var sistemaOperativo = os.type();
console.log('System OS: '+sistemaOperativo+' '+architetturaCPU);
//console.log(os.platform());
 
// Device width and height - Desktop / Tablet / Mobile
const device_width = 1920;
const device_height = 1080;

//Patch di Chrome
var systemPath = '';
if (sistemaOperativo == 'Darwin'){
    console.log('Chrome for MacOS');
    var systemPath = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
} else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'x64'){
    console.log('Chrome for Windows 64bit');
    var systemPath = 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe';
}else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'x32'){
    console.log('Chrome for Windows 32bit');
    var systemPath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
}else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'ia32'){
    console.log('Chrome for Windows 32bit');
    var systemPath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
}


//I build an Array and insert all the buttons for the consent of the Cookies for the Network sites
const cookie1 = 'button.amecp_button-accetto.iubenda-cs-close-btn';
const cookie2 = 'button.iubenda-cs-accept-btn.iubenda-cs-btn-primary';
var BtnCookie = [
  cookie1, 
  cookie2
];


(async function () {

//I read the url file
var fs = require('fs');
var urlArray = fs.readFileSync('url-list.js').toString().split("\n").filter(a => a);

//Launch Puppeteer
const browser = await puppeteer.launch({
  headless: true,
  executablePath: systemPath,
  args: ['--disable-dev-shm-usage','--no-sandbox']});


//Loop through all the url-list.js URL
var contaUrl = 0;
for(var i = 0; i < urlArray.length; i++){

//Check if empty spaces are present in the url file list
if (urlArray[i].indexOf("http") != '-1'){    

//I open the boswser, delete the cache and set the page size
const page = await browser.newPage();
const client = await page.target().createCDPSession();
await client.send('Network.clearBrowserCookies');
await client.send('Network.clearBrowserCache');

await page.setCacheEnabled(false);
await page.setViewport({width: device_width, height: device_height});
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36');

//Tell me which URL you are working on
console.log(' ');
console.log('\x1b[33m%s','Open URL > '+urlArray[i],'\x1b[0m');
console.log(' ');
await page.goto(urlArray[i],{waitUntil:'networkidle2'}); 
await page.waitFor(20000);

//Find the class / id of the button on the page to accept cookies
var contaNumeroValoriBtnCookie = BtnCookie.length;
for(var n = 0; n <= BtnCookie.length; n++){

if (await page.$(BtnCookie[n]) !== null ) {
  
console.log(BtnCookie[n]);
const navigationPromise = page.waitForSelector(BtnCookie[n]);
await page.click(BtnCookie[n]); 
await navigationPromise; 

    console.log('\x1b[32m%s', 'Bypass Cookie... OK!','\x1b[0m');
    break;
    }else if (n == contaNumeroValoriBtnCookie) {
     
        console.log('\x1b[31m%s', 'Cookie not found!','\x1b[0m');
    }else {
    
        //console.log('I'm looking for the cookie...');
    }

} //end - Find the class / id of the button on the page to accept cookies



//Scroll the entire page to load the content
await autoScroll(page);

async function autoScroll(page){
  await page.evaluate(async () => {
      await new Promise((resolve, reject) => {
          var totalHeight = 0;
          var distance  = 100;
         
          var timer = setInterval(() => {
              var scrollHeight = document.body.scrollHeight;
              window.scrollBy(0, distance);
              totalHeight += distance;

              if(totalHeight >= scrollHeight){
                  clearInterval(timer);
                  resolve();
              }
          }, 300);
      });
  });
}

//Go back to the top of the page
await page.evaluate(_ => {window.scrollTo(0, 0);});

await page.waitFor(10000);

//I clean up the URL before saving the file
var str = urlArray[i];
str = str.replace(/[^\w]+/ig,'-');
var convertiUrl = str;

//SAVE screenshot
await page.screenshot({path: './screenshot/'+convertiUrl+i+'.jpg', fullPage: true});
await page.waitFor(5000);

//Register through an external site, on which url the script is most used
if (contaUrl < urlArray.length){
const urlCeck = new URL(urlArray[i]);
var impressionPage = "https://lab.servername.it/track/index1.php?id=6dfc76542a4817a-368/-/"+urlCeck.hostname+"/-/impressionPage";
await page.goto(impressionPage ,{waitUntil:'load'});
}

contaUrl++;

//I register via an external site, the username that used the script and count how many times the script was used in total
if (contaUrl >= urlArray.length){
var userNameScript = await username();
var impressionNomeUtente = "https://lab.servername.it/track/index1.php?id=5a98b9a86118ae6-288/-/"+userNameScript+"/-/userNameScript";
var impressionScript = "https://lab.servername.it/track/index1.php?id=5a98b9a86118ae6-288/-/script_impression/-/script_impression";
await page.goto(impressionNomeUtente, {waitUntil:'load'});
await page.goto(impressionScript, {waitUntil:'load'});
}

await page.close();

}//end if (urlArray[i].indexOf("http") != '-1'){  
  
}//fine loop

browser.close();

console.log(' ');
console.log('\x1b[32m%s', contaUrl+' all screenshot saved :)','\x1b[0m');
console.log(' ');

})(); //end script
1-我从Url list.js文件打开Url

2-我等待页面加载

3-我做了一个检查,看看我的页面上是否存在同意隐私的按钮。如果是 存在,单击它并等待加载广告格式

4-我做了一个完整的页面滚动加载所有的图像

5-将截图保存在我的电脑上

6-通过外部站点注册脚本的使用次数

我时不时会遇到什么,我不明白的是:

const puppeteer = require('puppeteer');
const os = require('os');
const username = require('username');

//I identify the operating system and the architect of the CPU to run the Google Chrome Patch
var architetturaCPU = os.arch();
var sistemaOperativo = os.type();
console.log('System OS: '+sistemaOperativo+' '+architetturaCPU);
//console.log(os.platform());
 
// Device width and height - Desktop / Tablet / Mobile
const device_width = 1920;
const device_height = 1080;

//Patch di Chrome
var systemPath = '';
if (sistemaOperativo == 'Darwin'){
    console.log('Chrome for MacOS');
    var systemPath = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
} else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'x64'){
    console.log('Chrome for Windows 64bit');
    var systemPath = 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe';
}else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'x32'){
    console.log('Chrome for Windows 32bit');
    var systemPath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
}else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'ia32'){
    console.log('Chrome for Windows 32bit');
    var systemPath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
}


//I build an Array and insert all the buttons for the consent of the Cookies for the Network sites
const cookie1 = 'button.amecp_button-accetto.iubenda-cs-close-btn';
const cookie2 = 'button.iubenda-cs-accept-btn.iubenda-cs-btn-primary';
var BtnCookie = [
  cookie1, 
  cookie2
];


(async function () {

//I read the url file
var fs = require('fs');
var urlArray = fs.readFileSync('url-list.js').toString().split("\n").filter(a => a);

//Launch Puppeteer
const browser = await puppeteer.launch({
  headless: true,
  executablePath: systemPath,
  args: ['--disable-dev-shm-usage','--no-sandbox']});


//Loop through all the url-list.js URL
var contaUrl = 0;
for(var i = 0; i < urlArray.length; i++){

//Check if empty spaces are present in the url file list
if (urlArray[i].indexOf("http") != '-1'){    

//I open the boswser, delete the cache and set the page size
const page = await browser.newPage();
const client = await page.target().createCDPSession();
await client.send('Network.clearBrowserCookies');
await client.send('Network.clearBrowserCache');

await page.setCacheEnabled(false);
await page.setViewport({width: device_width, height: device_height});
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36');

//Tell me which URL you are working on
console.log(' ');
console.log('\x1b[33m%s','Open URL > '+urlArray[i],'\x1b[0m');
console.log(' ');
await page.goto(urlArray[i],{waitUntil:'networkidle2'}); 
await page.waitFor(20000);

//Find the class / id of the button on the page to accept cookies
var contaNumeroValoriBtnCookie = BtnCookie.length;
for(var n = 0; n <= BtnCookie.length; n++){

if (await page.$(BtnCookie[n]) !== null ) {
  
console.log(BtnCookie[n]);
const navigationPromise = page.waitForSelector(BtnCookie[n]);
await page.click(BtnCookie[n]); 
await navigationPromise; 

    console.log('\x1b[32m%s', 'Bypass Cookie... OK!','\x1b[0m');
    break;
    }else if (n == contaNumeroValoriBtnCookie) {
     
        console.log('\x1b[31m%s', 'Cookie not found!','\x1b[0m');
    }else {
    
        //console.log('I'm looking for the cookie...');
    }

} //end - Find the class / id of the button on the page to accept cookies



//Scroll the entire page to load the content
await autoScroll(page);

async function autoScroll(page){
  await page.evaluate(async () => {
      await new Promise((resolve, reject) => {
          var totalHeight = 0;
          var distance  = 100;
         
          var timer = setInterval(() => {
              var scrollHeight = document.body.scrollHeight;
              window.scrollBy(0, distance);
              totalHeight += distance;

              if(totalHeight >= scrollHeight){
                  clearInterval(timer);
                  resolve();
              }
          }, 300);
      });
  });
}

//Go back to the top of the page
await page.evaluate(_ => {window.scrollTo(0, 0);});

await page.waitFor(10000);

//I clean up the URL before saving the file
var str = urlArray[i];
str = str.replace(/[^\w]+/ig,'-');
var convertiUrl = str;

//SAVE screenshot
await page.screenshot({path: './screenshot/'+convertiUrl+i+'.jpg', fullPage: true});
await page.waitFor(5000);

//Register through an external site, on which url the script is most used
if (contaUrl < urlArray.length){
const urlCeck = new URL(urlArray[i]);
var impressionPage = "https://lab.servername.it/track/index1.php?id=6dfc76542a4817a-368/-/"+urlCeck.hostname+"/-/impressionPage";
await page.goto(impressionPage ,{waitUntil:'load'});
}

contaUrl++;

//I register via an external site, the username that used the script and count how many times the script was used in total
if (contaUrl >= urlArray.length){
var userNameScript = await username();
var impressionNomeUtente = "https://lab.servername.it/track/index1.php?id=5a98b9a86118ae6-288/-/"+userNameScript+"/-/userNameScript";
var impressionScript = "https://lab.servername.it/track/index1.php?id=5a98b9a86118ae6-288/-/script_impression/-/script_impression";
await page.goto(impressionNomeUtente, {waitUntil:'load'});
await page.goto(impressionScript, {waitUntil:'load'});
}

await page.close();

}//end if (urlArray[i].indexOf("http") != '-1'){  
  
}//fine loop

browser.close();

console.log(' ');
console.log('\x1b[32m%s', contaUrl+' all screenshot saved :)','\x1b[0m');
console.log(' ');

})(); //end script
  • 有时,我可以找到接受页面上coockies的按钮,即使它存在于Cookie数组中
  • 有时,找到接受隐私同意的按钮的时间用完了,然后脚本停止
  • 有时脚本崩溃我不知道为什么。。。也许它会使记忆饱和
我恳请您帮助我理解我是否写错了这个脚本,或者是否有可能在某些方面改进它。:)

文件screenshot.js:

const puppeteer = require('puppeteer');
const os = require('os');
const username = require('username');

//I identify the operating system and the architect of the CPU to run the Google Chrome Patch
var architetturaCPU = os.arch();
var sistemaOperativo = os.type();
console.log('System OS: '+sistemaOperativo+' '+architetturaCPU);
//console.log(os.platform());
 
// Device width and height - Desktop / Tablet / Mobile
const device_width = 1920;
const device_height = 1080;

//Patch di Chrome
var systemPath = '';
if (sistemaOperativo == 'Darwin'){
    console.log('Chrome for MacOS');
    var systemPath = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
} else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'x64'){
    console.log('Chrome for Windows 64bit');
    var systemPath = 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe';
}else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'x32'){
    console.log('Chrome for Windows 32bit');
    var systemPath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
}else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'ia32'){
    console.log('Chrome for Windows 32bit');
    var systemPath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
}


//I build an Array and insert all the buttons for the consent of the Cookies for the Network sites
const cookie1 = 'button.amecp_button-accetto.iubenda-cs-close-btn';
const cookie2 = 'button.iubenda-cs-accept-btn.iubenda-cs-btn-primary';
var BtnCookie = [
  cookie1, 
  cookie2
];


(async function () {

//I read the url file
var fs = require('fs');
var urlArray = fs.readFileSync('url-list.js').toString().split("\n").filter(a => a);

//Launch Puppeteer
const browser = await puppeteer.launch({
  headless: true,
  executablePath: systemPath,
  args: ['--disable-dev-shm-usage','--no-sandbox']});


//Loop through all the url-list.js URL
var contaUrl = 0;
for(var i = 0; i < urlArray.length; i++){

//Check if empty spaces are present in the url file list
if (urlArray[i].indexOf("http") != '-1'){    

//I open the boswser, delete the cache and set the page size
const page = await browser.newPage();
const client = await page.target().createCDPSession();
await client.send('Network.clearBrowserCookies');
await client.send('Network.clearBrowserCache');

await page.setCacheEnabled(false);
await page.setViewport({width: device_width, height: device_height});
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36');

//Tell me which URL you are working on
console.log(' ');
console.log('\x1b[33m%s','Open URL > '+urlArray[i],'\x1b[0m');
console.log(' ');
await page.goto(urlArray[i],{waitUntil:'networkidle2'}); 
await page.waitFor(20000);

//Find the class / id of the button on the page to accept cookies
var contaNumeroValoriBtnCookie = BtnCookie.length;
for(var n = 0; n <= BtnCookie.length; n++){

if (await page.$(BtnCookie[n]) !== null ) {
  
console.log(BtnCookie[n]);
const navigationPromise = page.waitForSelector(BtnCookie[n]);
await page.click(BtnCookie[n]); 
await navigationPromise; 

    console.log('\x1b[32m%s', 'Bypass Cookie... OK!','\x1b[0m');
    break;
    }else if (n == contaNumeroValoriBtnCookie) {
     
        console.log('\x1b[31m%s', 'Cookie not found!','\x1b[0m');
    }else {
    
        //console.log('I'm looking for the cookie...');
    }

} //end - Find the class / id of the button on the page to accept cookies



//Scroll the entire page to load the content
await autoScroll(page);

async function autoScroll(page){
  await page.evaluate(async () => {
      await new Promise((resolve, reject) => {
          var totalHeight = 0;
          var distance  = 100;
         
          var timer = setInterval(() => {
              var scrollHeight = document.body.scrollHeight;
              window.scrollBy(0, distance);
              totalHeight += distance;

              if(totalHeight >= scrollHeight){
                  clearInterval(timer);
                  resolve();
              }
          }, 300);
      });
  });
}

//Go back to the top of the page
await page.evaluate(_ => {window.scrollTo(0, 0);});

await page.waitFor(10000);

//I clean up the URL before saving the file
var str = urlArray[i];
str = str.replace(/[^\w]+/ig,'-');
var convertiUrl = str;

//SAVE screenshot
await page.screenshot({path: './screenshot/'+convertiUrl+i+'.jpg', fullPage: true});
await page.waitFor(5000);

//Register through an external site, on which url the script is most used
if (contaUrl < urlArray.length){
const urlCeck = new URL(urlArray[i]);
var impressionPage = "https://lab.servername.it/track/index1.php?id=6dfc76542a4817a-368/-/"+urlCeck.hostname+"/-/impressionPage";
await page.goto(impressionPage ,{waitUntil:'load'});
}

contaUrl++;

//I register via an external site, the username that used the script and count how many times the script was used in total
if (contaUrl >= urlArray.length){
var userNameScript = await username();
var impressionNomeUtente = "https://lab.servername.it/track/index1.php?id=5a98b9a86118ae6-288/-/"+userNameScript+"/-/userNameScript";
var impressionScript = "https://lab.servername.it/track/index1.php?id=5a98b9a86118ae6-288/-/script_impression/-/script_impression";
await page.goto(impressionNomeUtente, {waitUntil:'load'});
await page.goto(impressionScript, {waitUntil:'load'});
}

await page.close();

}//end if (urlArray[i].indexOf("http") != '-1'){  
  
}//fine loop

browser.close();

console.log(' ');
console.log('\x1b[32m%s', contaUrl+' all screenshot saved :)','\x1b[0m');
console.log(' ');

})(); //end script
const puppeter=require('puppeter');
const os=要求('os');
const username=require('username');
//我确定了运行Google Chrome补丁的操作系统和CPU架构
var architetturaCPU=os.arch();
var sistemoperativo=os.type();
log('系统操作系统:'+sistemoperativo+''+architecturaturacpu);
//console.log(os.platform());
//设备宽度和高度-桌面/平板电脑/手机
常数装置_宽度=1920;
常数装置高度=1080;
//双铬贴片
var systemPath='';
如果(SISTEMAOPERTIVO==‘达尔文’){
console.log('Chrome for MacOS');
var systemPath='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
}否则
如果(SISTEMAOPERTIVO=='Windows\u NT'&&ArchiteTuraCPU=='x64'){
log('chromeforwindows64位');
var systemPath='C:\\Program Files(x86)\\Google\\Chrome\\Application\\Chrome.exe';
}否则
如果(系统操作=='Windows\u NT'&&ArchiteTuraCPU=='x32'){
log('Chrome for Windows 32位');
var systemPath='C:\\Program Files\\Google\\Chrome\\Application\\Chrome.exe';
}否则
如果(SISTEMAOPERTIVO=='Windows\u NT'&&ArchiteTuraCPU=='ia32'){
log('Chrome for Windows 32位');
var systemPath='C:\\Program Files\\Google\\Chrome\\Application\\Chrome.exe';
}
//我构建一个数组并插入所有按钮,以获得网络站点cookie的同意
const cookie1='button.amecp_button-accetto.iubenda cs close btn';
const cookie2='button.iubenda cs accept btn.iubenda cs btn primary';
变量BtnCookie=[
库克1,
库克2
];
(异步函数(){
//我读了url文件
var fs=需要('fs');
var urlArray=fs.readFileSync('url-list.js').toString().split(“\n”).filter(a=>a);
//发射木偶演员
const browser=wait puppeter.launch({
无头:是的,
可执行路径:系统路径,
参数:['--disable dev shm usage','--no sandbox']});
//循环浏览所有url-list.js url
var contaUrl=0;
对于(var i=0;i'+urlArray[i],'\x1b[0m');
控制台日志(“”);
wait page.goto(urlArray[i],{waitUntil:'networkidle2'});
等待页面。等待(20000);
//在页面上查找按钮的类别/id以接受Cookie
var contaNumeroValoriBtnCookie=BtnCookie.length;

对于(var n=0;n您的一些问题可以在这里得到回答,但是这个问题的格式和这么多的脚本相当于一个代码审查请求,它在堆栈交换上有自己的平台:(好的,它需要一个无错误的代码)。我建议-下次-只共享相关代码,或者共享脚本的伪代码版本,使其具有更一般的外观,以便其他人可以发现问题+答案更有用。“有时脚本崩溃我不知道为什么”您可以共享有关这些情况的错误消息吗?一点问题也没有:)还没有,因为我在url后面放了我的评论:他们希望在代码审查时可以使用代码,这可能只需要一些小的重构。所以你的问题在这里更好(我在评论后也意识到了这一点)。但我希望你能感觉到这个问题可以改进。这次如果你提供更多关于错误消息的信息,我可以看看(同时希望社区中一些严格的成员不要标记整个问题,因为你刚刚删除了你的评论。我回复了hi@theDavidBarton,对不起,我正在考虑取消这个问题并取消了评论。所以让我们这样做吧,一旦脚本返回错误,我会将其添加到帖子中并给你留下评论!感谢支持..:)您好@theDavidBarton,为什么有些截图的宽度要大一些?我设置了一个1920px的宽度值…您的一些问题可以在这里得到回答,但是这个包含大量脚本的问题的格式是一个代码复查请求,它在堆栈交换上有自己的平台:(好的,它需要一个无错误的代码)