Javascript PhantomJS Web抓取Cisco交换机Web界面

Javascript PhantomJS Web抓取Cisco交换机Web界面,javascript,jquery,ajax,phantomjs,Javascript,Jquery,Ajax,Phantomjs,我最近进入了PhantomJS,目前我的第一份开发工作就是使用PhantomJS 我的任务是从一个旧的Cisco catalyst 2960 x交换机通过局域网电缆连接到PC,从中获取网络交换机信息(主机名、产品ID、IP地址、MAC地址等) 我使用phantomJS headless浏览器获得了http认证,可以打开第一个切换页面,但它会导致一个启动页面,如下图所示 此启动页面仅在用户必须单击“继续”按钮后首次登录/访问交换机,该按钮具有如下所示的表单按钮输入属性。(顺便说一下,是用AJAX编

我最近进入了PhantomJS,目前我的第一份开发工作就是使用PhantomJS

我的任务是从一个旧的Cisco catalyst 2960 x交换机通过局域网电缆连接到PC,从中获取网络交换机信息(主机名、产品ID、IP地址、MAC地址等)

我使用phantomJS headless浏览器获得了http认证,可以打开第一个切换页面,但它会导致一个启动页面,如下图所示

此启动页面仅在用户必须单击“继续”按钮后首次登录/访问交换机,该按钮具有如下所示的表单按钮输入属性。(顺便说一下,是用AJAX编写的)

  • Frmwrkresource.htm
  • topbannernofpv.shtml
  • setup_report.htm
输入“button1”存在于setup_report.htm框架内。按下“按钮1”时

setscookiesandLoadsCiscoDeviceManager();
被称为

此函数调用仅存在于在startup_report和Cisco device manager(10.44.39.252/xhome.htm)之间转换时调用的所有javascript资源中的preflight.js中。我认为浏览器cookies是这个问题的主要部分

附件是我的源代码。它处于不同的完成阶段

var page = require('webpage').create();
var fs = require('fs');

console.log("\n:Welcome to my Crawler Scrapper:");

var url = 'http://10.44.39.252/';

page.settings.userName='star';
page.settings.password='----------';
page.customHeaders={'Authorization': 'Basic '+btoa('star:xzsawq4321')};

page.settings.userAgent = 'PMG Web Crawler Bot/1.0';

page.onNavigationRequested = function(url,type,willNavigate, main){
console.log("\n----------------------------------------------");
console.log("Navigation Request Information:\n")
console.log('Trying to navigate to: ' + url);//where are you going?
console.log('Caused by: ' + type);           //request type
console.log('Will it actually navigate: ' + willNavigate);
console.log('Sent from the page\'s main frame: ' + main);
console.log("----------------------------------------------\n");
};

page.onResourceError = function(resourceError){
console.log("\nHold Up, We have Errors!")
console.log("Resource Error Information: \n")
console.log('Resoruce ErrorID:' + resourceError.id + '\nURL:' + 
resourceError.url);
console.log('Resource Error Code: ' + resourceError.errorCode + 
'\nDescription: ' + resourceError.errorString);
};

page.onConsoleMessage = function(msg) {
console.log("The Browser Replied:" + msg);
};

//////////////////////////////////////////////////////////////////
page.onLoadStarted = function(){
console.log("Loadng Page...")

};

page.onLoadFinished = function(){
console.log("Loading finished:\n");
};
//////////////////////////////////////////////////////////////////

page.viewportSize = {
width: 1920,
height: 1200
};

var sel = 'button1'; //DOM manipulate, selector
var type = 'click', //action

//webpage.open
page.open(url,function(status){
if(status === "success"){
page.includeJs( 
"http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js",function() 
{//jquery syntax has been successfully included
setTimeout(function(){
var t = page.evaluate(function(sel) {

var a = $('title').text();
return a;
},0,sel);
console.log("Title: " + t + "\n\n");

phantom.addCookie({
Cisco_DeviceManager     : 'value',   /* required property */
SSLPreference  : 2,  /* required property */
gettingstarted   : 1
});

page.open('http://10.44.39.252/xhome.htm', function (status) {
$(document).ready(function(){
console.log("Your Document is Ready:"+ document.title +"\n");

/*ajax assynchronous http request
$.ajax({
async: false,//blocks the ajax call, SYNCHRONOUS ajax Request
url: 'http://10.44.39.252/setup_report.htm?button1=Continue', 
type: 'GET',
data: {button1: 'Continue'},
success: function (out) {
    console.log("REQUEST SENT!\n\n");
    console.log(typeof(out));
    $('button1').trigger(sel);
    console.log($('.homecontent').text);
    //$("button1").click(function(){
    // $("input").trigger("select");
    //});

},
error: function(){
console.log("Nein!");
}
});
*/


});
});
},3000);

setTimeout(function() {
page.render("phantomspecs1.jpg");
console.log("\nNow GTFO!")
phantom.exit();
},20000);

console.log("Wait for the Async...");//prints first!

},0);//closes includejs which doesnt operate in the next open...
}else{
    console.log("Connect fail");
    phantom.exit();
}
});
我需要phantomJS绕过启动页面,转到CiscoDeviceManager,在那里我可以呈现开关信息。但我对JavaScript、JQuery和AJAX的知识仍然缺乏(不是天生的程序员,而是在大学毕业后找到了一份编码工作,但我确实有一些基本概念)

如果你们中有人能帮我指出下一步的正确方向,我就可以完成任务并编写文档。毫无疑问,这对幻影社区是有价值的。(我很自豪能成为其中的一员)

真诚地, 阿菲克·阿卜杜勒·哈米德,
马来西亚Cyberjaya

当您使用无头浏览器进行此项工作时,最合理的方法是以正常用户的方式使用无头浏览器。不要做疯狂的饼干操作等等,这只是创造了比你需要做的更多的工作

PhantomJS用于使用javascript自动化浏览器交互,您只需注入一些简单的javascript即可与UI交互

用户登录时只显示一次的表单应该很容易处理

用户登录后,只需尝试获取button元素,如果它存在,请单击它

var btn1 = document.querySelector('input[name="button1"]')
if(btn1 !== null) {
    //continue button exists trigger a click.
    btn1.click();
}

另外,在您进行刮片工作时,有一个很棒的库,名为PhantomJS,您可以安装在PhantomJS之上,它可以抽象出许多复杂性。

PhatnomJS group version:很好的答案。另外,请务必使用持久cookies
--cookies file=cookies.txt
,这样启动页面将只显示一次(如果我正确理解了您的问题),谢谢Daniel Lane。现在我将重点介绍按钮触发方法。我尝试实现您的代码建议,但控制台返回“按钮不存在”,我想我必须在InjectJS({和page.evaluate)中实现waifor.js或$.ajax({你有什么建议吗?或者我应该问另一个问题吗?是的,我考虑过Casper.js,我确实安装了它,但现在我将使用phantom。完成后可能会转到Casper。通常最好再问另一个问题。也就是说,如果你确定它是通过ajax/动态javascript加载的,你可以执行querySelector在setInterval函数中查找(轮询),直到找到元素。
var page = require('webpage').create();
var fs = require('fs');

console.log("\n:Welcome to my Crawler Scrapper:");

var url = 'http://10.44.39.252/';

page.settings.userName='star';
page.settings.password='----------';
page.customHeaders={'Authorization': 'Basic '+btoa('star:xzsawq4321')};

page.settings.userAgent = 'PMG Web Crawler Bot/1.0';

page.onNavigationRequested = function(url,type,willNavigate, main){
console.log("\n----------------------------------------------");
console.log("Navigation Request Information:\n")
console.log('Trying to navigate to: ' + url);//where are you going?
console.log('Caused by: ' + type);           //request type
console.log('Will it actually navigate: ' + willNavigate);
console.log('Sent from the page\'s main frame: ' + main);
console.log("----------------------------------------------\n");
};

page.onResourceError = function(resourceError){
console.log("\nHold Up, We have Errors!")
console.log("Resource Error Information: \n")
console.log('Resoruce ErrorID:' + resourceError.id + '\nURL:' + 
resourceError.url);
console.log('Resource Error Code: ' + resourceError.errorCode + 
'\nDescription: ' + resourceError.errorString);
};

page.onConsoleMessage = function(msg) {
console.log("The Browser Replied:" + msg);
};

//////////////////////////////////////////////////////////////////
page.onLoadStarted = function(){
console.log("Loadng Page...")

};

page.onLoadFinished = function(){
console.log("Loading finished:\n");
};
//////////////////////////////////////////////////////////////////

page.viewportSize = {
width: 1920,
height: 1200
};

var sel = 'button1'; //DOM manipulate, selector
var type = 'click', //action

//webpage.open
page.open(url,function(status){
if(status === "success"){
page.includeJs( 
"http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js",function() 
{//jquery syntax has been successfully included
setTimeout(function(){
var t = page.evaluate(function(sel) {

var a = $('title').text();
return a;
},0,sel);
console.log("Title: " + t + "\n\n");

phantom.addCookie({
Cisco_DeviceManager     : 'value',   /* required property */
SSLPreference  : 2,  /* required property */
gettingstarted   : 1
});

page.open('http://10.44.39.252/xhome.htm', function (status) {
$(document).ready(function(){
console.log("Your Document is Ready:"+ document.title +"\n");

/*ajax assynchronous http request
$.ajax({
async: false,//blocks the ajax call, SYNCHRONOUS ajax Request
url: 'http://10.44.39.252/setup_report.htm?button1=Continue', 
type: 'GET',
data: {button1: 'Continue'},
success: function (out) {
    console.log("REQUEST SENT!\n\n");
    console.log(typeof(out));
    $('button1').trigger(sel);
    console.log($('.homecontent').text);
    //$("button1").click(function(){
    // $("input").trigger("select");
    //});

},
error: function(){
console.log("Nein!");
}
});
*/


});
});
},3000);

setTimeout(function() {
page.render("phantomspecs1.jpg");
console.log("\nNow GTFO!")
phantom.exit();
},20000);

console.log("Wait for the Async...");//prints first!

},0);//closes includejs which doesnt operate in the next open...
}else{
    console.log("Connect fail");
    phantom.exit();
}
});
var btn1 = document.querySelector('input[name="button1"]')
if(btn1 !== null) {
    //continue button exists trigger a click.
    btn1.click();
}