Java Hadoop纱线映射任务的物理和虚拟内存不足_Java_Hadoop_Selenium Webdriver_Yarn_Ghostdriver

Java Hadoop纱线映射任务的物理和虚拟内存不足

java hadoop selenium-webdriver

Java Hadoop纱线映射任务的物理和虚拟内存不足,java,hadoop,selenium-webdriver,yarn,ghostdriver,Java,Hadoop,Selenium Webdriver,Yarn,Ghostdriver,我有下面的方法，我在多线程执行中从map任务运行，但是在独立的mod e中可以很好地工作，但是当我在Hadoop Thread中运行这个方法时，它会耗尽1GB的物理内存，并且虚拟内存也会迅速增加我需要知道，从编程的角度看，我是否做错了什么，我认为我正在尽快关闭所有打开的流，因此我认为没有理由发生内存泄漏。请告知谢谢公共静态无效管理器当前url（字符串url）{ logger.trace（“输入当前URL的方法管理器”）； InputStream=null； InputStream=null

我有下面的方法，我在多线程执行中从map任务运行，但是在独立的mod e中可以很好地工作，但是当我在Hadoop Thread中运行这个方法时，它会耗尽1GB的物理内存，并且虚拟内存也会迅速增加

我需要知道，从编程的角度看，我是否做错了什么，我认为我正在尽快关闭所有打开的流，因此我认为没有理由发生内存泄漏。请告知

谢谢

公共静态无效管理器当前url（字符串url）{

logger.trace（“输入当前URL的方法管理器”）；
InputStream=null；
InputStream=null；
ByteArrayOutputStream out=null；
WebDriver=null；
试一试{
if（StringUtils.isNotBlank（url））{
setJavascriptEnabled（true）；//实际上不需要：JS
//默认情况下启用
caps.setCapability(
PhantomJSDriverService.PHANTOMJS_可执行路径_属性，
“/usr/local/bin/phantomjs”）；
//启动驱动程序（将负责管理和拥有phantomjs
//过程）
驱动程序=新的幻影驱动程序（caps）；
获取（url）；
字符串htmlContent=driver.getPageSource（）；
if（htmlContent！=null）{
is=newbytearrayinputstream（htmlContent.getBytes（））；
ByteArrayDocumentSource ByteArrayDocumentSource=新建ByteArrayDocumentSource(
是，url，“文本/html”）；
Any23 runner=新的Any23（）；
setHTTPUserAgent（“测试用户代理”）；
out=新的ByteArrayOutputStream（）；
TripleHandler处理程序=新的NTriplesWriter（输出）；
试一试{
extract（byteArrayDocumentSource，handler）；
}捕获（提取异常e）{
}最后{
if（驱动程序！=null）{
driver.quit（）；
//driver.close（）；
}
试一试{
handler.close（）；
}捕获（三重句柄异常）{
}
如果（is！=null）{
试一试{
is.close（）；
}捕获（IOE异常）{
}
}
}
if（out！=null）{
stream=newbytearrayinputstream（out.toByteArray（））；
迭代器it=新的DeltaParser（流）；
如果（it！=null）{
SolrCallBackbornXParser回调=新建SolrCallBackbornXParser(
网址）；
callback.startStory（）；
while（it.hasNext（））{
节点[]abc=it.next（）；
processStory（abc）；
}
callback.endStory（）；
}
}
}
}
}捕获（IOE异常）{
返回；
}
最后{
if（流！=null）{
试一试{
stream.close（）；
}捕获（IOE异常）{
}
}
if（out！=null）{
试一试{
out.close（）；
}捕获（IOE异常）{
}
}
}
trace（“退出方法管理器当前URL”）；

}

logger.trace("Entering the method manageTheCurrentURL ");

InputStream stream = null;
InputStream is = null;
ByteArrayOutputStream out = null;
WebDriver driver = null;
try {

    if (StringUtils.isNotBlank(url)) {

        caps.setJavascriptEnabled(true); // not really needed: JS
                                            // enabled by default
        caps.setCapability(
                PhantomJSDriverService.PHANTOMJS_EXECUTABLE_PATH_PROPERTY,
                "/usr/local/bin/phantomjs");

        // Launch driver (will take care and ownership of the phantomjs
        // process)
        driver = new PhantomJSDriver(caps);
        driver.get(url);
        String htmlContent = driver.getPageSource();

        if (htmlContent != null) {

            is = new ByteArrayInputStream(htmlContent.getBytes());

            ByteArrayDocumentSource byteArrayDocumentSource = new ByteArrayDocumentSource(
                    is, url, "text/html");

            Any23 runner = new Any23();
            runner.setHTTPUserAgent("test-user-agent");

            out = new ByteArrayOutputStream();
            TripleHandler handler = new NTriplesWriter(out);

            try {
                runner.extract(byteArrayDocumentSource, handler);
            } catch (ExtractionException e) {


            } finally {

                if (driver != null) {
                    driver.quit();
                    //driver.close();
                }

                try {
                    handler.close();

                } catch (TripleHandlerException e) {

                }
                if (is != null) {
                    try {
                        is.close();
                    } catch (IOException e) {
                    }
                }

            }

            if (out != null) {

                stream = new ByteArrayInputStream(out.toByteArray());
                Iterator<Node[]> it = new DeltaParser(stream);
                if (it != null) {

                    SolrCallbackForNXParser callback = new SolrCallbackForNXParser(
                            url);
                    callback.startStory();

                    while (it.hasNext()) {
                        Node[] abc = it.next();
                        callback.processStory(abc);
                    }

                    callback.endStory();
                }
            }

        }

    }

} catch (IOException e) {
    return;
}

finally {

    if (stream != null) {
        try {
            stream.close();
        } catch (IOException e) {
        }
    }
    if (out != null) {
        try {
            out.close();
        } catch (IOException e) {
        }

    }
}

logger.trace("Exiting the method manageTheCurrentURL ");