Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/multithreading/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 使用多线程代码防止DOS攻击_Java_Multithreading_Hadoop_Networking_Tcp - Fatal编程技术网

Java 使用多线程代码防止DOS攻击

Java 使用多线程代码防止DOS攻击,java,multithreading,hadoop,networking,tcp,Java,Multithreading,Hadoop,Networking,Tcp,让我在这里概述一下我的问题: 每台服务器约有4000台服务器和数百万个URL。我的代码需要点击每个URL,并将响应代码与URL一起写入HDFS文件系统 这里也添加了一些内容: 我在这里使用的是400线程的生产者-消费者模型。该代码最近对少数web服务器造成了DOS攻击,我很难找出问题所在: 主要类别: public void readURLS(final Path inputPath, final Path outputPath) { LOG.info("Looking fo

让我在这里概述一下我的问题:

每台服务器约有4000台服务器和数百万个URL。我的代码需要点击每个URL,并将响应代码与URL一起写入HDFS文件系统

这里也添加了一些内容:

我在这里使用的是400线程的生产者-消费者模型。该代码最近对少数web服务器造成了DOS攻击,我很难找出问题所在:

主要类别:

public void readURLS(final Path inputPath, final Path outputPath) {
    LOG.info("Looking for files to download, queue size: {}, DOWNLOAD_THREADS: {}", queueSize, producerThreads);
    final List<Path> files = HdfsUtils.listDirectory(inputPath, hadoopConf);
    final BlockingQueue<String> queue = new LinkedBlockingQueue<>(queueSize);
    final UrlConsumerWriter consumerWriter =
            new UrlConsumerWriter(queue, outputPath, hadoopConf);

    LOG.info("Starting download of {} files from: '{}'", files.size(), inputPath);
    final ExecutorService writerPool = DownloadUtils.createWriterPool();
    CompletableFuture<Void> producer = downloadFilesToQueue(files, queue)
            .thenRun(consumerWriter::notifyProducersDone);
    CompletableFuture<Void> consumer =
            CompletableFuture.runAsync(consumerWriter, writerPool)// Cancel download workers if write worker fails
                    .whenComplete((result, err) -> {
                        if (err != null) {
                            LOG.error("Consumer Write worker failed!", err);
                            producer.cancel(true);
                        }
                    });

    writerPool.shutdown();
    producer.join();
    consumer.join();
    LOG.info("Url Validation Job Complete!!!");
}

private CompletableFuture<Void> downloadFilesToQueue(
        final List<Path> files,
        final BlockingQueue<String> downloadQueue
) {
    final ExecutorService pool = DownloadUtils.createDownloadPool(producerThreads);

    final List<CompletableFuture<Void>> workers = files.stream()
            .map(file -> new UrlDownloadWorker(clock, file, hadoopConf, downloadQueue,
                    utils, (validatorImpl.emptyTable())))
            .map(worker -> CompletableFuture.runAsync(worker, pool))
            .collect(Collectors.toList());

    pool.shutdown();

    final CompletableFuture<Void> allDownloads = CompletableFuture.allOf(workers.toArray(new CompletableFuture[0]));

    // When one worker fails, cancel all the other immediately
    for (final CompletableFuture<Void> worker : workers) {
        worker.whenComplete((v, err) -> {
            if (err != null) {
                LOG.error("Download worker failed!", err);
                allDownloads.cancel(true);
            }
        });
    }

    return allDownloads;
}
UrlValidationUtils类:

public final class UrlValidationUtils {
    private static final String WEBSITENOTCHECK = "uncheck.org";
    private final Map<String, StatusCode> blockedHosts = new ConcurrentHashMap<>();
    private static final int MAX_REDIRECT = 4;

    public StatusCode validateURL(String url) throws IOException {
        return validate(url, MAX_REDIRECT);
    }

    private StatusCode validate(String url, int maxRedirect) throws IOException {
        URL urlValue = new URL(url);
        HttpURLConnection con;

        if (url.contains(WEBSITENOTCHECK)) {
            blockedHosts.put(urlValue.getHost(), StatusCode.SUCCESS);
        }
        //first check if the host is already marked as invalid
//        if (blockedHosts.containsKey(urlValue.getHost())) {
//            return blockedHosts.get(urlValue.getHost());
//        }
        StatusCode statusCode;
        con = (HttpURLConnection) urlValue.openConnection();

        try {
            int resCode;
            con.setInstanceFollowRedirects(false);
            con.setConnectTimeout(3000); //set timeout to 3 seconds
            con.connect();
            resCode = con.getResponseCode();

            LOG.info("thread name {} connection id {} url {} ", Thread.currentThread().getName(), con.toString(), url);
            if (resCode == HttpURLConnection.HTTP_OK) {
                statusCode = StatusCode.SUCCESS;
            } else if (resCode == HttpURLConnection.HTTP_SEE_OTHER || resCode == HttpURLConnection.HTTP_MOVED_PERM
                    || resCode == HttpURLConnection.HTTP_MOVED_TEMP) {
                String location = con.getHeaderField("Location");
                if (location.startsWith("/")) {
                    location = urlValue.getProtocol() + "://" + urlValue.getHost() + location;
                }
                statusCode = validateRedirect(location, maxRedirect - 1, con);

            } else {
                blockedHosts.put(urlValue.getHost(), StatusCode.FAIL);
                statusCode = StatusCode.FAIL;
            }
        } catch (UnknownHostException e) {
            blockedHosts.put(urlValue.getHost(), StatusCode.UNKOWNHOST);
            statusCode = StatusCode.UNKOWNHOST;
        } catch (ConnectException e) {
            blockedHosts.put(urlValue.getHost(), StatusCode.CONNECTION_ISSUE);
            statusCode = StatusCode.CONNECTION_ISSUE;
        } catch (IOException e) {
            //if an IOException is caught possible reason is SOCKETTIMEOUT
            blockedHosts.put(urlValue.getHost(), StatusCode.SOCKETTIMEOUT);
            statusCode = StatusCode.SOCKETTIMEOUT;
        }
        con.disconnect();
        LOG.info("thread name {} connection id {} url {} ", Thread.currentThread().getName(), con.toString(), url);

        return statusCode;
    }


    private StatusCode validateRedirect(String location, int redirectCount, HttpURLConnection connection)
            throws IOException {
        if (redirectCount >= 0) {
            connection.disconnect();
            return validate(location, redirectCount);
        }
        return StatusCode.FAIL;

    }

}
public最终类urlvalizationutils{
私有静态最终字符串WEBSITENOTCHECK=“uncheck.org”;
private final Map blockedHosts=新的ConcurrentHashMap();
私有静态最终int MAX_REDIRECT=4;
公共状态码validateURL(字符串url)引发IOException{
返回验证(url,最大重定向);
}
私有状态码验证(字符串url,int maxRedirect)引发IOException{
URL urlValue=新URL(URL);
HttpURLConnection;
if(url.contains(WEBSITENOTCHECK)){
blockedHosts.put(urlValue.getHost(),StatusCode.SUCCESS);
}
//首先检查主机是否已标记为无效
//if(blockedHosts.containsKey(urlValue.getHost())){
//返回blockedHosts.get(urlValue.getHost());
//        }
状态码状态码;
con=(HttpURLConnection)urlValue.openConnection();
试一试{
int重编码;
con.setInstanceFollow(false);
con.setConnectTimeout(3000);//将超时设置为3秒
con.connect();
resCode=con.getResponseCode();
LOG.info(“线程名称{}连接id{}url{}”,thread.currentThread().getName(),con.toString(),url);
if(resCode==HttpURLConnection.HTTP\u确定){
statusCode=statusCode.SUCCESS;
}else if(resCode==HttpURLConnection.HTTP_请参见_OTHER | | resCode==HttpURLConnection.HTTP_MOVED_PERM
||重新编码==HttpURLConnection.HTTP_MOVED_TEMP){
字符串位置=con.getHeaderField(“位置”);
if(location.startsWith(“/”){
location=urlValue.getProtocol()+“:/”+urlValue.getHost()+位置;
}
statusCode=ValidateDirect(位置,maxRedirect-1,con);
}否则{
blockedHosts.put(urlValue.getHost(),StatusCode.FAIL);
statusCode=statusCode.FAIL;
}
}捕获(未知后异常e){
blockedHosts.put(urlValue.getHost(),StatusCode.UNKOWNHOST);
statusCode=statusCode.UNKOWNHOST;
}捕获(连接异常){
blockedHosts.put(urlValue.getHost(),StatusCode.CONNECTION\u问题);
statusCode=statusCode.CONNECTION\u问题;
}捕获(IOE异常){
//如果捕获到IOException,可能的原因是SOCKETTIMEOUT
blockedHosts.put(urlValue.getHost(),StatusCode.SOCKETTIMEOUT);
statusCode=statusCode.SOCKETTIMEOUT;
}
con.disconnect();
LOG.info(“线程名称{}连接id{}url{}”,thread.currentThread().getName(),con.toString(),url);
返回状态码;
}
私有状态码ValidateDirect(字符串位置、int-redirectCount、HttpURLConnection)
抛出IOException{
如果(重定向计数>=0){
连接断开();
返回验证(位置、重定向计数);
}
返回StatusCode.FAIL;
}
}

为了避免服务器过载,我建议在点击一批URL之前等待几毫秒。例如,在点击N个URL后,您可以等待20毫秒,然后点击下一个N。。等等批处理的大小(N)取决于您知道服务器在一秒钟内可以处理多少个请求。在性能方面,您是否与他们签订了服务级别协议?

如果是这样,完成这项工作需要几天/几周的时间,不是吗?这取决于服务器每秒可以处理多少请求。你有他们的SLA吗?如果他们每秒可以处理1000个请求,那么就使用1000个批次。另一个想法:你有数百万个独特的URL吗?你能减少要点击的URL的数量吗?
public final class UrlValidationUtils {
    private static final String WEBSITENOTCHECK = "uncheck.org";
    private final Map<String, StatusCode> blockedHosts = new ConcurrentHashMap<>();
    private static final int MAX_REDIRECT = 4;

    public StatusCode validateURL(String url) throws IOException {
        return validate(url, MAX_REDIRECT);
    }

    private StatusCode validate(String url, int maxRedirect) throws IOException {
        URL urlValue = new URL(url);
        HttpURLConnection con;

        if (url.contains(WEBSITENOTCHECK)) {
            blockedHosts.put(urlValue.getHost(), StatusCode.SUCCESS);
        }
        //first check if the host is already marked as invalid
//        if (blockedHosts.containsKey(urlValue.getHost())) {
//            return blockedHosts.get(urlValue.getHost());
//        }
        StatusCode statusCode;
        con = (HttpURLConnection) urlValue.openConnection();

        try {
            int resCode;
            con.setInstanceFollowRedirects(false);
            con.setConnectTimeout(3000); //set timeout to 3 seconds
            con.connect();
            resCode = con.getResponseCode();

            LOG.info("thread name {} connection id {} url {} ", Thread.currentThread().getName(), con.toString(), url);
            if (resCode == HttpURLConnection.HTTP_OK) {
                statusCode = StatusCode.SUCCESS;
            } else if (resCode == HttpURLConnection.HTTP_SEE_OTHER || resCode == HttpURLConnection.HTTP_MOVED_PERM
                    || resCode == HttpURLConnection.HTTP_MOVED_TEMP) {
                String location = con.getHeaderField("Location");
                if (location.startsWith("/")) {
                    location = urlValue.getProtocol() + "://" + urlValue.getHost() + location;
                }
                statusCode = validateRedirect(location, maxRedirect - 1, con);

            } else {
                blockedHosts.put(urlValue.getHost(), StatusCode.FAIL);
                statusCode = StatusCode.FAIL;
            }
        } catch (UnknownHostException e) {
            blockedHosts.put(urlValue.getHost(), StatusCode.UNKOWNHOST);
            statusCode = StatusCode.UNKOWNHOST;
        } catch (ConnectException e) {
            blockedHosts.put(urlValue.getHost(), StatusCode.CONNECTION_ISSUE);
            statusCode = StatusCode.CONNECTION_ISSUE;
        } catch (IOException e) {
            //if an IOException is caught possible reason is SOCKETTIMEOUT
            blockedHosts.put(urlValue.getHost(), StatusCode.SOCKETTIMEOUT);
            statusCode = StatusCode.SOCKETTIMEOUT;
        }
        con.disconnect();
        LOG.info("thread name {} connection id {} url {} ", Thread.currentThread().getName(), con.toString(), url);

        return statusCode;
    }


    private StatusCode validateRedirect(String location, int redirectCount, HttpURLConnection connection)
            throws IOException {
        if (redirectCount >= 0) {
            connection.disconnect();
            return validate(location, redirectCount);
        }
        return StatusCode.FAIL;

    }

}