Javascript 一个关于鼠标动作的木偶问题没有';行不通

Javascript 一个关于鼠标动作的木偶问题没有';行不通,javascript,node.js,puppeteer,Javascript,Node.js,Puppeteer,我正在使用Puppeter尝试获取有关该页面的一些信息,但我遇到了一个反爬虫页面。我可以通过在实际页面上拖动滑块来解决这个问题,但木偶演员的鼠标模拟似乎并不能解决这个问题。所以我来这里是想问如何解决这个问题。 这是我的密码: userAgent.js const userAgents=[ “Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.8.0.12)Gecko/20070731 Ubuntu/dapper security Firefox/1.5.0.12”,

我正在使用Puppeter尝试获取有关该页面的一些信息,但我遇到了一个反爬虫页面。我可以通过在实际页面上拖动滑块来解决这个问题,但木偶演员的鼠标模拟似乎并不能解决这个问题。所以我来这里是想问如何解决这个问题。 这是我的密码: userAgent.js

const userAgents=[
“Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.8.0.12)Gecko/20070731 Ubuntu/dapper security Firefox/1.5.0.12”,
“Mozilla/4.0(兼容;MSIE 7.0;Windows NT 6.0;Acoo浏览器;SLCC1;.NET CLR 2.0.50727;Media Center PC 5.0;.NET CLR 3.0.04506)”,
“Mozilla/5.0(Windows NT 6.1;WOW64)AppleWebKit/535.11(KHTML,类似Gecko)Chrome/17.0.963.56 Safari/535.11”,
“Mozilla/5.0(Macintosh;英特尔Mac OS X 10_7_3)AppleWebKit/535.20(KHTML,类似Gecko)Chrome/19.0.1036.7 Safari/535.20”,
'Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.9.0.8)Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6',
“Mozilla/5.0(Windows NT 6.1;WOW64)AppleWebKit/537.1(KHTML,类似Gecko)Chrome/21.0.1180.71 Safari/537.1 LBBROWSER”,
'Mozilla/5.0(兼容;MSIE 9.0;Windows NT 6.1;Win64;x64;Trident/5.0;.NET CLR 3.5.30729;.NET CLR 3.0.30729;.NET CLR 2.0.50727;Media Center PC 6.0),Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
“Mozilla/4.0(兼容;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727)”,
“Mozilla/5.0(兼容;MSIE 9.0;Windows NT 6.1;WOW64;Trident/5.0;SLCC2;.NET CLR 2.0.50727;.NET CLR 3.5.30729;.NET CLR 3.0.30729;Media Center PC 6.0;.NET4.0C;.NET4.0E;QQBrowser/7.0.3698.400)”,
“Mozilla/4.0(兼容;MSIE 6.0;Windows NT 5.1;SV1;QQDownload 732;.NET4.0C;.NET4.0E)”,
“Mozilla/5.0(Windows NT 6.1;Win64;x64;rv:2.0b13pre)Gecko/20110307 Firefox/4.0b13pre”,
“Opera/9.80(Macintosh;英特尔Mac OS X 10.6.8;U;fr)Presto/2.9.168版本/11.52”,
“Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.8.0.12)Gecko/20070731 Ubuntu/dapper security Firefox/1.5.0.12”,
“Mozilla/5.0(兼容;MSIE 9.0;Windows NT 6.1;WOW64;Trident/5.0;SLCC2;.NET CLR 2.0.50727;.NET CLR 3.5.30729;.NET CLR 3.0.30729;Media Center PC 6.0;.NET4.0C;.NET4.0E;LBBROWSER)”,
'Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.9.0.8)Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6',
'Mozilla/5.0(X11;U;Linux;en-US)AppleWebKit/527+(KHTML,如Gecko、Safari/419.3)Arora/0.6',
“Mozilla/5.0(兼容;MSIE 9.0;Windows NT 6.1;WOW64;Trident/5.0;SLCC2;.NET CLR 2.0.50727;.NET CLR 3.5.30729;.NET CLR 3.0.30729;Media Center PC 6.0;.NET4.0C;.NET4.0E;QQBrowser/7.0.3698.400)”,
“Opera/9.25(Windows NT 5.1;U;en),Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9”,
“Mozilla/5.0(Windows NT 10.0;WOW64)AppleWebKit/537.36(KHTML,类似Gecko)Chrome/61.0.3163.100 Safari/537.36”,
'Mozilla/5.0(Macintosh;英特尔Mac OS X 10_14_6)AppleWebKit/537.36(KHTML,类似Gecko)Chrome/80.0.3987.149 Safari/537.36'
];
module.exports={
用户代理
};
htmlService.js

const puppeteer = require('puppeteer');
const {userAgents} = require('./userAgent')

let puppeteerBrowser = null;

async function getHtmlContent(url) {
    const userAgent = userAgents[parseInt(Math.random() * userAgents.length)];
    const headers = {
        'User-Agent': userAgent,
        Accept:
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
        'Cache-Control': 'no-cache',
        Connection: 'keep-alive',
        Cookie:
            'zh_choose=s; zh_choose=s; _gscu_1575893663=56268672godhjb19; _gscbrs_1575893663=1; _pk_id.52.d4c1=ca0b23081b215ee0.1556268673.2.1584608708.1584608646.; _pk_ref.52.d738=%5B%22%22%2C%22%22%2C1584838630%2C%22http%3A%2F%2Flocalhost%3A8888%2F%22%5D; _pk_ses.52.d738=*; _gscs_1575893663=t84838630uv0y6117|pv:3; _pk_id.52.d738=54fee0e32da49e39.1584266678.6.1584838693.1584838630.',
        Pragma: 'no-cache',
        'Upgrade-Insecure-Requests': 1,
    };
    if (!puppeteerBrowser) {
        puppeteerBrowser = await puppeteer.launch({
            headless: false,
            slowMo: 250,
            args: ['-no-sandbox', '--window-size=1920,1080'],
        }, {waitUntil: 'domcontentloaded'});
    }
    let page = await puppeteerBrowser.newPage();
    await page.goto(url);
    await page.evaluate(async () => {
        Object.defineProperty(navigator, 'webdriver', {get: () => false})
    });
    await page.waitFor(1000);
    const sliderElement = await page.$('#nc_1_n1z.nc_iconfont.btn_slide');
    const sliderHandle = await page.$('#nc_1__scale_text.scale_text');
    await page.evaluate(() => {
        let reportMousePosition = function(action) {
            return function(e){
                let mouseX = e.offsetX;
                let mouseY = e.offsetY;
                console.log(action, mouseX, mouseY);
            }
        }
        document.onmousemove = reportMousePosition('move');
        document.onmousedown = reportMousePosition('down');
        document.onmouseup = reportMousePosition('up');
    })
    if (sliderElement && sliderHandle) {
        console.log('here');
        const slider = await sliderElement.boundingBox();
        const handle = await sliderHandle.boundingBox();
        console.log([handle.x + handle.width / 2, handle.y + handle.height / 2])
        await page.mouse.move(handle.x + handle.width / 2, handle.y + handle.height / 2)
        await page.waitFor(10);
        await page.mouse.down()
        await page.waitFor(10);
        for (let i = 0; i < slider.width; i += 10) {
            for (let j = 0; j < handle.height; j += 10) {
                await page.mouse.move(handle.x + i, handle.y + j)
            }
        }
        await page.waitFor(10);
        await page.mouse.up()
        console.log([handle.x + slider.width, handle.y + handle.height / 2])
    }
    await page.waitFor(300);
    const html = await page.content();
    return html;
}

module.exports = {
    getHtmlContent
};

您在访问时会经常遇到它。

所以问题是:鼠标移动并不能阻止您被识别为爬虫?在这种情况下,它与木偶演员的鼠标动作无关,而是自动检测浏览器。是的,我认为他们已经采取了一些措施。这两者在鼠标移动方面有什么不同吗@DAVIDBARTON两者都与浏览器通信,我认为在这种情况下没有区别。但我认为爬虫检测是在不同的层面上发生的,当你开始移动鼠标时,它已经检测到你了。你应该尝试另一种方法来假装自己是一个真正的用户。