Javascript 一个关于鼠标动作的木偶问题没有';行不通
我正在使用Puppeter尝试获取有关该页面的一些信息,但我遇到了一个反爬虫页面。我可以通过在实际页面上拖动滑块来解决这个问题,但木偶演员的鼠标模拟似乎并不能解决这个问题。所以我来这里是想问如何解决这个问题。 这是我的密码: userAgent.jsJavascript 一个关于鼠标动作的木偶问题没有';行不通,javascript,node.js,puppeteer,Javascript,Node.js,Puppeteer,我正在使用Puppeter尝试获取有关该页面的一些信息,但我遇到了一个反爬虫页面。我可以通过在实际页面上拖动滑块来解决这个问题,但木偶演员的鼠标模拟似乎并不能解决这个问题。所以我来这里是想问如何解决这个问题。 这是我的密码: userAgent.js const userAgents=[ “Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.8.0.12)Gecko/20070731 Ubuntu/dapper security Firefox/1.5.0.12”,
const userAgents=[
“Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.8.0.12)Gecko/20070731 Ubuntu/dapper security Firefox/1.5.0.12”,
“Mozilla/4.0(兼容;MSIE 7.0;Windows NT 6.0;Acoo浏览器;SLCC1;.NET CLR 2.0.50727;Media Center PC 5.0;.NET CLR 3.0.04506)”,
“Mozilla/5.0(Windows NT 6.1;WOW64)AppleWebKit/535.11(KHTML,类似Gecko)Chrome/17.0.963.56 Safari/535.11”,
“Mozilla/5.0(Macintosh;英特尔Mac OS X 10_7_3)AppleWebKit/535.20(KHTML,类似Gecko)Chrome/19.0.1036.7 Safari/535.20”,
'Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.9.0.8)Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6',
“Mozilla/5.0(Windows NT 6.1;WOW64)AppleWebKit/537.1(KHTML,类似Gecko)Chrome/21.0.1180.71 Safari/537.1 LBBROWSER”,
'Mozilla/5.0(兼容;MSIE 9.0;Windows NT 6.1;Win64;x64;Trident/5.0;.NET CLR 3.5.30729;.NET CLR 3.0.30729;.NET CLR 2.0.50727;Media Center PC 6.0),Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
“Mozilla/4.0(兼容;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727)”,
“Mozilla/5.0(兼容;MSIE 9.0;Windows NT 6.1;WOW64;Trident/5.0;SLCC2;.NET CLR 2.0.50727;.NET CLR 3.5.30729;.NET CLR 3.0.30729;Media Center PC 6.0;.NET4.0C;.NET4.0E;QQBrowser/7.0.3698.400)”,
“Mozilla/4.0(兼容;MSIE 6.0;Windows NT 5.1;SV1;QQDownload 732;.NET4.0C;.NET4.0E)”,
“Mozilla/5.0(Windows NT 6.1;Win64;x64;rv:2.0b13pre)Gecko/20110307 Firefox/4.0b13pre”,
“Opera/9.80(Macintosh;英特尔Mac OS X 10.6.8;U;fr)Presto/2.9.168版本/11.52”,
“Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.8.0.12)Gecko/20070731 Ubuntu/dapper security Firefox/1.5.0.12”,
“Mozilla/5.0(兼容;MSIE 9.0;Windows NT 6.1;WOW64;Trident/5.0;SLCC2;.NET CLR 2.0.50727;.NET CLR 3.5.30729;.NET CLR 3.0.30729;Media Center PC 6.0;.NET4.0C;.NET4.0E;LBBROWSER)”,
'Mozilla/5.0(X11;U;Linux i686;en-US;rv:1.9.0.8)Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6',
'Mozilla/5.0(X11;U;Linux;en-US)AppleWebKit/527+(KHTML,如Gecko、Safari/419.3)Arora/0.6',
“Mozilla/5.0(兼容;MSIE 9.0;Windows NT 6.1;WOW64;Trident/5.0;SLCC2;.NET CLR 2.0.50727;.NET CLR 3.5.30729;.NET CLR 3.0.30729;Media Center PC 6.0;.NET4.0C;.NET4.0E;QQBrowser/7.0.3698.400)”,
“Opera/9.25(Windows NT 5.1;U;en),Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9”,
“Mozilla/5.0(Windows NT 10.0;WOW64)AppleWebKit/537.36(KHTML,类似Gecko)Chrome/61.0.3163.100 Safari/537.36”,
'Mozilla/5.0(Macintosh;英特尔Mac OS X 10_14_6)AppleWebKit/537.36(KHTML,类似Gecko)Chrome/80.0.3987.149 Safari/537.36'
];
module.exports={
用户代理
};
htmlService.js
const puppeteer = require('puppeteer');
const {userAgents} = require('./userAgent')
let puppeteerBrowser = null;
async function getHtmlContent(url) {
const userAgent = userAgents[parseInt(Math.random() * userAgents.length)];
const headers = {
'User-Agent': userAgent,
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
Cookie:
'zh_choose=s; zh_choose=s; _gscu_1575893663=56268672godhjb19; _gscbrs_1575893663=1; _pk_id.52.d4c1=ca0b23081b215ee0.1556268673.2.1584608708.1584608646.; _pk_ref.52.d738=%5B%22%22%2C%22%22%2C1584838630%2C%22http%3A%2F%2Flocalhost%3A8888%2F%22%5D; _pk_ses.52.d738=*; _gscs_1575893663=t84838630uv0y6117|pv:3; _pk_id.52.d738=54fee0e32da49e39.1584266678.6.1584838693.1584838630.',
Pragma: 'no-cache',
'Upgrade-Insecure-Requests': 1,
};
if (!puppeteerBrowser) {
puppeteerBrowser = await puppeteer.launch({
headless: false,
slowMo: 250,
args: ['-no-sandbox', '--window-size=1920,1080'],
}, {waitUntil: 'domcontentloaded'});
}
let page = await puppeteerBrowser.newPage();
await page.goto(url);
await page.evaluate(async () => {
Object.defineProperty(navigator, 'webdriver', {get: () => false})
});
await page.waitFor(1000);
const sliderElement = await page.$('#nc_1_n1z.nc_iconfont.btn_slide');
const sliderHandle = await page.$('#nc_1__scale_text.scale_text');
await page.evaluate(() => {
let reportMousePosition = function(action) {
return function(e){
let mouseX = e.offsetX;
let mouseY = e.offsetY;
console.log(action, mouseX, mouseY);
}
}
document.onmousemove = reportMousePosition('move');
document.onmousedown = reportMousePosition('down');
document.onmouseup = reportMousePosition('up');
})
if (sliderElement && sliderHandle) {
console.log('here');
const slider = await sliderElement.boundingBox();
const handle = await sliderHandle.boundingBox();
console.log([handle.x + handle.width / 2, handle.y + handle.height / 2])
await page.mouse.move(handle.x + handle.width / 2, handle.y + handle.height / 2)
await page.waitFor(10);
await page.mouse.down()
await page.waitFor(10);
for (let i = 0; i < slider.width; i += 10) {
for (let j = 0; j < handle.height; j += 10) {
await page.mouse.move(handle.x + i, handle.y + j)
}
}
await page.waitFor(10);
await page.mouse.up()
console.log([handle.x + slider.width, handle.y + handle.height / 2])
}
await page.waitFor(300);
const html = await page.content();
return html;
}
module.exports = {
getHtmlContent
};
您在访问时会经常遇到它。所以问题是:鼠标移动并不能阻止您被识别为爬虫?在这种情况下,它与木偶演员的鼠标动作无关,而是自动检测浏览器。是的,我认为他们已经采取了一些措施。这两者在鼠标移动方面有什么不同吗@DAVIDBARTON两者都与浏览器通信,我认为在这种情况下没有区别。但我认为爬虫检测是在不同的层面上发生的,当你开始移动鼠标时,它已经检测到你了。你应该尝试另一种方法来假装自己是一个真正的用户。