Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/381.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 我怎样才能更快地阅读和写作? 规则1总是使用更大的缓冲区。1024小得可怜。试试32-64K。 在对管道执行任何写入操作之前,需要启动管道读取线程。事实上,我很惊讶你没有出现“读取结束”错误。这段代码真的有效吗? 事实上,摆脱管道流。使用一个线程并在运行过_Java_Performance_Stream_Text Processing_Bigdata - Fatal编程技术网

Java 我怎样才能更快地阅读和写作? 规则1总是使用更大的缓冲区。1024小得可怜。试试32-64K。 在对管道执行任何写入操作之前,需要启动管道读取线程。事实上,我很惊讶你没有出现“读取结束”错误。这段代码真的有效吗? 事实上,摆脱管道流。使用一个线程并在运行过

Java 我怎样才能更快地阅读和写作? 规则1总是使用更大的缓冲区。1024小得可怜。试试32-64K。 在对管道执行任何写入操作之前,需要启动管道读取线程。事实上,我很惊讶你没有出现“读取结束”错误。这段代码真的有效吗? 事实上,摆脱管道流。使用一个线程并在运行过,java,performance,stream,text-processing,bigdata,Java,Performance,Stream,Text Processing,Bigdata,我怎样才能更快地阅读和写作? 规则1总是使用更大的缓冲区。1024小得可怜。试试32-64K。 在对管道执行任何写入操作之前,需要启动管道读取线程。事实上,我很惊讶你没有出现“读取结束”错误。这段代码真的有效吗? 事实上,摆脱管道流。使用一个线程并在运行过程中执行所有处理。 摆脱现成的测试。这是一个额外的系统调用。一直读到流的末尾。 使用BufferedWriter而不是BufferedOutputStream,停止将所有这些字符串转换为字节,并使用BufferedWriter.newLine而

我怎样才能更快地阅读和写作? 规则1总是使用更大的缓冲区。1024小得可怜。试试32-64K。 在对管道执行任何写入操作之前,需要启动管道读取线程。事实上,我很惊讶你没有出现“读取结束”错误。这段代码真的有效吗? 事实上,摆脱管道流。使用一个线程并在运行过程中执行所有处理。 摆脱现成的测试。这是一个额外的系统调用。一直读到流的末尾。 使用BufferedWriter而不是BufferedOutputStream,停止将所有这些字符串转换为字节,并使用BufferedWriter.newLine而不是system属性。
尝试一些明智的System.out.println语句来识别代码中的慢区域。你也许可以引入休息;在其中一个循环中,如果满足条件,则不必继续处理数据。如果这还不能缩小范围,那么试着使用java分析器。你试过评测它吗?你需要找出瓶颈在哪里。是的,我正在使用VisualVM来分析它。我还发现,对于print语句,代码在实际数据转换、读取输入流和处理它时非常快。读一个3.5gb的文件,处理一些文本并将其写入一个新的文件,我应该期望什么样的程序运行时间?我所做的只是将缓冲区增加到建议的32k,程序的速度只增加了5倍!我忘了我有这么小的设置来测试小文件。我将不得不研究使用BufferedWriter,我以前一直在向streams写入数据。这是因为AWS PUTobject需要一个输入流作为参数。如果将我的缓冲区增加到64k,这会导致处理4gb的较大文件时出现问题吗?我在xms3000m会有堆问题吗?如何?64K是64K。这与文件的大小无关。但不要期望64K的速度是32K的两倍。好处是渐进的。大多数5x来自于从1K到8K。上面提到的在运行时配置中使用64k缓冲区和2800mb初始堆。我已经设法将程序运行时间缩短到2.5小时,以处理一个3.4gb的文本文件。你也可以在ZipInputStream下尝试BufferedInputStream,但可能已经有了一个,所以可能不会有什么不同。最后一条建议:不要在写循环中刷新。我没有得到嵌套的readLine循环:外部循环只执行一次。
public class DataMiner  {

private static BigData app = new BigData();
private static DomainOfConstants doc = new DomainOfConstants();
private static Logger log = Logger.getLogger(DataMiner.class);
private static DBManager conn = new DBManager();
private static java.sql.Connection con = null;
private static AmazonS3  s3Client;
private static Iterator<String> itr;
private static List<String> entries = new ArrayList<String>();
private static S3Object s3Object;
private static ObjectMetadata meta;
public static InputStream dataStream;
public static byte[] buffer = new byte[1024];
public static File file = new File(app.getCurrentPacsId()+".txt");



private static void obtainConnection(){
    conn.connection();
    entries = conn.grabDataSet();       
    conn.closeDb();
    downloadBucket();
}

/*
 * 
 * The Java heap size limits for Windows are:
 * maximum possible heap size on 32-bit Java: 1.8 GB
 * recommended heap size limit on 32-bit Java: 1.5 GB (or 1.8 GB with /3GB option)
 * 
 * */
/*-------------Download and un-zip backup file-------------*/
private static void downloadBucket(){

    try {
        app.setAwsCredentials(doc.getAccessKey(), doc.getSecretKey());
        s3Client = AmazonS3ClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider(app.getAwsCredentials())).withRegion(Regions.US_EAST_1).build();
        System.out.println("Connected to S3");
        itr = entries.iterator();
        while(itr.hasNext()){
            app.setBucketKey(itr.next());
            String key = app.getBucketKey();
            app.setCurrentPacsId(key);
            s3Object = s3Client.getObject(new GetObjectRequest(doc.getDesiredBucket(), app.getBucketKey()));
            try {
                ZipInputStream zis = new ZipInputStream(s3Object.getObjectContent());
                ZipEntry entry = zis.getNextEntry();
                extractObjects(buffer, s3Client, zis, entry);                   
            } catch (AmazonServiceException e) {
                log.error(e);
            } catch (SdkClientException e) {
                log.error(e);
            } catch (IOException e) {
                log.error(e);
            }
        }
        System.out.println("Processing complete");


    } catch (IllegalArgumentException e) {
        e.printStackTrace();
    } 
}

public static void extractObjects(byte[] buffer, AmazonS3 s3Client, ZipInputStream zis, ZipEntry entry) throws IOException {
    PipedOutputStream outputStream = null;
    PipedInputStream is = null;
    try {
        while (entry != null) 
        {
            String fileName = entry.getName();
            if (fileName == "lib") {
                fileName = entry.getName();
            }
            boolean containsBackup = fileName.contains(doc.getDesiredFile());

            if (containsBackup == true) {
                System.out.println("A back up file was found");
                long start = System.currentTimeMillis();
                formatSchemaName();
                System.out.println("Extracting :" + app.getCurrentPacsId());
                log.info("Extracting " + app.getCurrentPacsId() + ", 
                compressed: " + entry.getCompressedSize() + " bytes, 
                extracted: " + 
                entry.getSize() + " bytes");
         //ByteArrayOutputStream outputStream = new ByteArrayOutputStream();


                outputStream = new PipedOutputStream();
                is = new PipedInputStream(outputStream);

                int len;
                while ((len = zis.read(buffer)) >= 0) 
                {
                    outputStream.write(buffer, 0, len);
                }
   //InputStream is = new ByteArrayInputStream(outputStream.toByteArray());
                meta = new ObjectMetadata();
                meta.setContentLength(file.length());
                fileName = app.getCurrentPacsId();
                runDataConversion(is,s3Client,fileName);
                recordTime(start);
                is.close();
                outputStream.close();
                System.out.println("Unzip complete");               
            }
            else{
                System.out.println("No back up found");
            }
            entry = zis.getNextEntry();
        }
        zis.closeEntry();
        zis.close();
    } catch (AmazonServiceException e) {
        log.error(e);
    } catch (SdkClientException e) {
        log.error(e);
    }
}


/*------------Formating the replacment file name---------*/
private static void formatSchemaName(){
    String s3Key = app.getCurrentPacsId();
    String id = s3Key.replace(".zip", ".txt");
    id = id.substring(id.indexOf("_"));
    id = id.replaceFirst("_", "");
    app.setCurrentPacsId(id);
}

/*---------------Process the data file----------------------*/
private static void runDataConversion(PipedInputStream is, AmazonS3 s3Client, String fileName) {
    DataProcessor convert = new DataProcessor(s3Client);
    convert.downloadBucket(is,fileName);
}

/*-------Records execution time of program in min/sec------*/
private static void recordTime(long start) throws IOException {
    long end = System.currentTimeMillis();
    long minutes = TimeUnit.MILLISECONDS.toMinutes(end - start);
    long seconds = TimeUnit.MILLISECONDS.toSeconds(end - start);
    System.out.println("Execution speed "+ minutes + ":" + (seconds % 60) +" min/sec\n");
}
public class DataProcessor {

private static AmazonS3 s3Client;
private static ObjectMetadata meta;
private static DomainOfConstants doc = new DomainOfConstants();
private static BigData app = new BigData();
public static File file = new File(app.getCurrentPacsId()+".txt");
private static Logger log = Logger.getLogger(DataProcessor.class);

//Construct connection
public DataProcessor (AmazonS3 s3Client){
    this.s3Client = s3Client;
}

//
public void downloadBucket(PipedInputStream is, String fileName) {
    try {
        File dataStream = dataConversion(is);
        s3Client.putObject(doc.getDestinationBucket(),FilenameUtils.getFullPath(doc.getDestinationKey()) + "Modified_"+ fileName, dataStream);
    } catch (AmazonServiceException e) {
        e.printStackTrace();
        log.error(e);
    } catch (SdkClientException e) {
        e.printStackTrace();
        log.error(e);

    }               
}

//Setup reading and writing streams
public static File dataConversion(PipedInputStream stream) {
    BufferedReader reader = null;
    BufferedOutputStream streamOut = null;
    String line;

    try {
        reader = new BufferedReader(new InputStreamReader(stream,doc.getFileFormat()));
        streamOut = new BufferedOutputStream(new FileOutputStream(file));
        meta = new ObjectMetadata();
        while(( line = reader.readLine() ) != null)
        {
            processLine(reader, streamOut, line);
        }
    }
    catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            streamOut.close();
            reader.close();

        } catch (IOException e) {
            e.printStackTrace();
            log.error(e);
        }
    }
    return file;
}


/*---------------------------------------Data processing------------------------------------------------*/

    /*-----------Process and print lines---------*/
private static void processLine(BufferedReader reader, BufferedOutputStream streamOut, String line) {
    try {
        String newLine = System.getProperty("line.separator");

        while (reader.ready()) {
            if (line.contains(doc.getInsert())) {
                handleData(streamOut, line);
            } else if (line.contains(doc.getUse())) {
                handleSchemaName(streamOut, line);
            } else {
                streamOut.write(line.toLowerCase().getBytes(Charset.forName(doc.getFileFormat()).toString()));
                streamOut.write(newLine.getBytes());
            }
            line = reader.readLine();
        }
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
        log.error(e);

    } catch (IOException e) {
        e.printStackTrace();
        log.error(e);

    }
}

    /*-----------Replace-Schema-Name-----------*/
private static void handleSchemaName(BufferedOutputStream streamOut, String line) throws IOException {
    line = line.replace(line, "USE " + "`" + doc.getSchemaName() + app.getCurrentPacsId() + "`;");
    streamOut.write(line.getBytes(Charset.forName(doc.getFileFormat())));
}


    /*--------Avoid-Formating-Data-Portion-of-file--------*/
private static void handleData(BufferedOutputStream streamOut, String line) throws IOException {
    StringTokenizer tk = new StringTokenizer(line);
    while (tk.hasMoreTokens()) {
        String data = tk.nextToken();
        if (data.equals(doc.getValue())) {
            streamOut.write(data.toLowerCase().getBytes(Charset.forName(doc.getFileFormat()).toString()));
            data = tk.nextToken();
            while (tk.hasMoreTokens()) {
                streamOut.write(data.getBytes(Charset.forName(doc.getFileFormat())));
                data = tk.nextToken();
            }
        }
        streamOut.write(line.toLowerCase().getBytes(Charset.forName(doc.getFileFormat().toString())));
        streamOut.write(" ".getBytes(Charset.forName(doc.getFileFormat())));
    }
}