Javascript CasperJS-内存耗尽

Javascript CasperJS-内存耗尽,javascript,memory,memory-leaks,web-scraping,casperjs,Javascript,Memory,Memory Leaks,Web Scraping,Casperjs,当我通过命令行运行它时,它会持续一两个小时,然后命令行会显示“内存耗尽”。我不知道发生了什么事 另外,还有一些关于如何使这个项目更具可读性或可修改性的一般性建议,因为我将在一个月内完成这个项目 var fs = require('fs'); var currentPhysician = []; var physicianData = []; var permitMax = 99999; var alreadyParsed = []; var targetFile = "CMQphysicians

当我通过命令行运行它时,它会持续一两个小时,然后命令行会显示“内存耗尽”。我不知道发生了什么事

另外,还有一些关于如何使这个项目更具可读性或可修改性的一般性建议,因为我将在一个月内完成这个项目

var fs = require('fs');
var currentPhysician = [];
var physicianData = [];
var permitMax = 99999;
var alreadyParsed = [];
var targetFile = "CMQphysicians.csv";

var startTime = new Date().getTime();

var permitNumber = -1;

var firstLicense = 0;

var utils = require('utils');

String.prototype.contains = function (s) {

    return (this.indexOf(s) != -1);

}


var casper = require('casper').create({
verbose : true,
logLevel : "info",
pageSettings : {
loadImages : false, // do not load images
loadPlugins : false // do not load NPAPI plugins (Flash, Silverlight, ...)
}
});


function getPermitNumberString() {
    var pn = permitNumber.toString();

    var l = pn.length;
    var i;
    var leadingZeros = '';

    for (i = 0; i < (5 - pn.length); i++) {
        leadingZeros = leadingZeros + '0';
    }

    return leadingZeros + pn;

}

function getDetailsData() {
    var details = document.querySelectorAll('#content-html > table.griddetails > tbody > tr > td');
    return Array.prototype.map.call(details, function (e) {
        return e.innerText;
    });

}

function getPhysicianCount() {
    return document.querySelectorAll("#GViewList > tbody > tr:nth-child(2) > td:nth-child(1) > a").length;

}

casper.on("resource.error", function (resourceError) {
    if (!resourceError.url.contains('google')) {

        this.echo("Resource error: " + "Error code: " + resourceError.errorCode + " ErrorString: " + resourceError.errorString + " url: " + resourceError.url + " id: " + resourceError.id, "ERROR");
    }
    while (resourceError.errorString.contains('undefined')) {}
});

casper.on('load.started', function () {
    //casper.echo('load started');
});

casper.on('navigation.requested', function (url, navigationType, navigationLocked, isMainFrame) {
    //casper.echo('navigation requested');
    //casper.echo(navigationType);
});

casper.on('remote.message', function (msg) {
    this.echo('from within remote page DOM' + msg);
});

casper.start('https://www.google.ca/?gws_rd=ssl', function () { // Loads the initial page.
    casper.echo('Starting!');

});

casper.on('load.finished', function (status) {
    //casper.echo('load finished');

    var date = new Date();
    var hours = date.getHours();
    var minutes = date.getMinutes();
    //casper.echo(hours.toString() + ':' + minutes.toString() + '       ' + this.getCurrentUrl().toUpperCase());

    var urlPrefix = this.getCurrentUrl().substring(0, this.getCurrentUrl().indexOf('.aspx'));

    if (urlPrefix.length == 0) {
        casper.echo('undefined');
        urlPrefix = 'https://www.google.ca/?gws_rd=ssl'.toUpperCase();
    }

    switch (urlPrefix.toUpperCase()) {

    case 'https://www.google.ca/?gws_rd=ssl'.toUpperCase():
        casper.echo('on google');

        if (fs.exists('CMQphysicians.csv')) {

            stream = fs.open('CMQphysicians.csv', 'r');
            line = stream.readLine();
            var i = 0;
            while (line) {
                if (i > 0) {
                    alreadyParsed.push(Number(line.substring(0, line.indexOf(','))));
                }
                line = stream.readLine();
                i++;
            }
            stream.close();

            permitNumber = Math.max.apply(null, alreadyParsed) + 1;
            firstLicense = permitNumber;
            casper.echo(permitNumber);

        } else {

            fs.write(targetFile, "\uFEFF" + 'Permit Number,Last Name,First Name,Gender,Permit,Status,Specialty,Activity,Authorization,Address,Phone\n', 'a');

        }

        casper.thenOpen('http://www.cmq.org/bottin/index.aspx?lang=en&a=1');
        break;
    case 'http://www.cmq.org/bottin/index'.toUpperCase():
    casper.waitForSelector('#___gcse_0 > div > form > table.gsc-search-box > tbody > tr > td.gsc-search-button > input', function() {
    var finishedSoFar = permitNumber - firstLicense;
    var timeSoFar = new Date().getTime() - startTime;
    var licensesToDo = permitMax - permitNumber;
    var msPerLicense = timeSoFar / finishedSoFar;
    var minutesToGo = (licensesToDo * msPerLicense) / 1000 / 60;

    //casper.echo(licensesToDo + ' licenses to go. ' + msPerLicense.toString() + 'ms per license. ' + minutesToGo.toString() + ' minutes remaining.');
        casper.echo('index stage');
        permitNumber++;
        if (permitNumber > permitMax) {
            casper.echo('Permit number maxed out');
        } else {
            var permitNumberString = getPermitNumberString();
            casper.echo('going to list');
            casper.sendKeys('#txbNoPermis', permitNumberString);
            //casper.wait(100);
            casper.echo('sent keys, now clicking');
            casper.thenClick('#btSubmit');
            casper.echo('after the click');
        }
    });
    break;


    case 'http://www.cmq.org/bottin/list'.toUpperCase():
    casper.waitForSelector('#___gcse_0 > div > form > table.gsc-search-box > tbody > tr > td.gsc-search-button > input', function() {
        casper.echo('list stage');
        // Three cases:
        // No results, one result, many results
        // No results: go back (00000)
        // One result: go forward (82365)
        // Many results: crash (?????)


        a = casper.evaluate(getPhysicianCount);

        if (a == 0) {
            casper.echo('No physicians for license ' + getPermitNumberString());
            casper.echo('going to index');
            casper.thenClick('#btSubmit');
            //casper.wait(1000);
        } else if (a == 1) {
            casper.echo('Physician exists for license ' + getPermitNumberString());
            casper.echo('going to details');
            casper.thenClick('#GViewList > tbody > tr:nth-child(2) > td:nth-child(1) > a');
            //casper.wait(1000);
        } else if (a > 1) {
            casper.echo('a > 1 at ') + getPermitNumberString();
            while(true){}

        } else {
            casper.echo('negative a at ') + getPermitNumberString();
            while(true){}
        }

        // No results
    });
    break;

    case 'http://www.cmq.org/bottin/details'.toUpperCase():
    casper.waitForSelector('#___gcse_0 > div > form > table.gsc-search-box > tbody > tr > td.gsc-search-button > input', function() {
        casper.echo('details stage');
        var name = casper.getHTML('#content-html > table.griddetails > tbody > tr:nth-child(1) > th').substring(0, casper.getHTML('#content-html > table.griddetails > tbody > tr:nth-child(1) > th').indexOf('(')).trim().split(',');
        tableData = (casper.evaluate(getDetailsData));

        currentPhysician.push(tableData[4]);
        currentPhysician.push(name[0].trim());
        currentPhysician.push(name[1].trim());
        for (i = 2; i < tableData.length; i++) {
            if (i % 2 == 0 && i != 4) {
                currentPhysician.push(tableData[i]);

            }
        }

        for (i = 0; i < currentPhysician.length; i++) {
            currentPhysician[i] = currentPhysician[i].replace(/,/g, ';').replace(/\n/g, ';');
        }

        var physicianString = currentPhysician.join(',') + '\n';
        casper.echo('writing to file!');
        fs.write(targetFile, physicianString, 'a');

        currentPhysician = [];

        casper.echo(casper.exists('#btNewsearch'));
            casper.echo('going to index');
            casper.thenClick('#btNewsearch');
        //casper.wait(1000);
    });
    break;

    default:
        casper.echo("Wrong URL!");
        casper.back();
        break;


}});

casper.run(function () {
    casper.echo('ending!');

    casper.echo(physicianData.length);

});
var fs=require('fs');
var currentMedicine=[];
var Physician数据=[];
var permitMax=99999;
var alreadyParsed=[];
var targetFile=“cmqphysicans.csv”;
var startTime=new Date().getTime();
变量permitNumber=-1;
var firstLicense=0;
var utils=require('utils');
String.prototype.contains=函数{
return(this.indexOf(s)!=-1);
}
var casper=require('casper')。创建({
没错,
日志级别:“信息”,
页面设置:{
loadImages:false,//不加载图像
loadPlugins:false//不加载NPAPI插件(Flash、Silverlight等)
}
});
函数getPermitNumber字符串(){
var pn=permitNumber.toString();
var l=pn.长度;
var i;
var前导零=“”;
对于(i=0;i<(5-pn.长度);i++){
leadingZeros=leadingZeros+'0';
}
返回引线零+pn;
}
函数getDetailsData(){
var details=document.querySelectorAll('content html>table.griddetails>tbody>tr>td');
返回Array.prototype.map.call(详细信息,函数(e){
返回e.innerText;
});
}
函数getPhysicianCount(){
return document.queryselectoral(#GViewList>tbody>tr:nth child(2)>td:nth child(1)>a”).length;
}
casper.on(“resource.error”,函数(resourceError){
如果(!resourceError.url.contains('google')){
echo(“资源错误:”+“错误代码:”+resourceError.errorCode+“错误字符串:”+resourceError.ErrorString+“url:”+resourceError.url+“id:”+resourceError.id,“错误”);
}
而(resourceError.errorString.contains('undefined')){
});
casper.on('load.started',函数(){
//echo('load started');
});
casper.on('navigation.requested',函数(url、navigationType、navigationLocked、isMainFrame){
//echo(“请求导航”);
//casper.echo(导航类型);
});
casper.on('remote.message',函数(msg){
this.echo('来自远程页面DOM'+msg);
});
卡斯珀,开始https://www.google.ca/?gws_rd=ssl,函数(){//加载初始页。
echo('Starting!');
});
casper.on('load.finished',函数(状态){
//echo('load finished');
变量日期=新日期();
var hours=date.getHours();
var minutes=date.getMinutes();
//echo(hours.toString()+':'+minutes.toString()+''+this.getCurrentUrl().toUpperCase());
var urlPrefix=this.getCurrentUrl().substring(0,this.getCurrentUrl().indexOf('.aspx'));
如果(urlPrefix.length==0){
casper.echo(“未定义”);
URL前缀=https://www.google.ca/?gws_rd=ssl“.toUpperCase();
}
开关(urlPrefix.toUpperCase()){
案例https://www.google.ca/?gws_rd=ssl'.toUpperCase():
echo(“在谷歌上”);
如果(fs.exists('cmqphysicans.csv')){
stream=fs.open('cmqphysicans.csv','r');
line=stream.readLine();
var i=0;
while(行){
如果(i>0){
alreadyParsed.push(数字(行.子字符串(0,行.索引of(','))));
}
line=stream.readLine();
i++;
}
stream.close();
permitNumber=Math.max.apply(null,alreadyParsed)+1;
firstLicense=许可证编号;
casper.echo(许可号码);
}否则{
fs.write(targetFile,“\uFEFF”+”许可证编号、姓氏、名字、性别、许可证、状态、专业、活动、授权、地址、电话\n、'a');
}
卡斯珀,然后打开http://www.cmq.org/bottin/index.aspx?lang=en&a=1');
打破
案例http://www.cmq.org/bottin/index'.toUpperCase():
casper.waitForSelector('#uuu gcse_0>div>form>table.gsc-search-box>tbody>tr>td.gsc-search-button>input',函数(){
var finishedSoFar=permitNumber-firstLicense;
var timeSoFar=new Date().getTime()-startTime;
var licensesToDo=许可证最大值-许可证编号;
var msPerLicense=飞行时间/完成飞行时间;
var minutesToGo=(licensesToDo*msPerLicense)/1000/60;
//casper.echo(licensesToDo+‘许可证可以使用’++msPerLicense.toString()++‘ms per license’++minutesToGo.toString()++‘剩余分钟’);
echo(‘索引阶段’);
permitNumber++;
如果(permitNumber>permitMax){
casper.echo(‘许可证号最大化’);
}否则{
var permitNumberString=getPermitNumberString();
echo(‘将要上市’);
casper.sendKeys('#txbNoPermis',permitNumber字符串);
//等一下(100);
echo('sent key,now clicking');
casper.然后单击('btSubmit');
echo('after the click');
}
});
打破
案例http://www.cmq.org/bottin/list'.toUpperCase():
casper.waitForSelector('#uuu gcse_0>div>form>table.gsc-search-box>tbody>tr>td.gsc-search-button>input',函数(){
echo(“列表阶段”);
//三个案例:
//没有结果,一个结果,很多结果
//无结果:返回(00000)
//一个结果:前进(82365)
//许多结果:崩溃(???)
a=casper.evaluate(getPhysicianCount);
如果(a==0){
echo('无医师执照'+getPermitNumberString());
echo(‘进入索引’);
casper.然后单击('btSubmit');
//等一下(1000);
}如果(a==1),则为else{
echo('医师存在于许可证'+GetPermitNumber字符串());
echo(‘进入细节’);
casper.然后单击(“#GViewList>tbody>tr:nth child(2)>td:nth child(1)>a”);