Java 我怎样才能更快地阅读和写作? 规则1总是使用更大的缓冲区。1024小得可怜。试试32-64K。 在对管道执行任何写入操作之前,需要启动管道读取线程。事实上,我很惊讶你没有出现“读取结束”错误。这段代码真的有效吗? 事实上,摆脱管道流。使用一个线程并在运行过
我怎样才能更快地阅读和写作? 规则1总是使用更大的缓冲区。1024小得可怜。试试32-64K。 在对管道执行任何写入操作之前,需要启动管道读取线程。事实上,我很惊讶你没有出现“读取结束”错误。这段代码真的有效吗? 事实上,摆脱管道流。使用一个线程并在运行过程中执行所有处理。 摆脱现成的测试。这是一个额外的系统调用。一直读到流的末尾。 使用BufferedWriter而不是BufferedOutputStream,停止将所有这些字符串转换为字节,并使用BufferedWriter.newLine而不是system属性。Java 我怎样才能更快地阅读和写作? 规则1总是使用更大的缓冲区。1024小得可怜。试试32-64K。 在对管道执行任何写入操作之前,需要启动管道读取线程。事实上,我很惊讶你没有出现“读取结束”错误。这段代码真的有效吗? 事实上,摆脱管道流。使用一个线程并在运行过,java,performance,stream,text-processing,bigdata,Java,Performance,Stream,Text Processing,Bigdata,我怎样才能更快地阅读和写作? 规则1总是使用更大的缓冲区。1024小得可怜。试试32-64K。 在对管道执行任何写入操作之前,需要启动管道读取线程。事实上,我很惊讶你没有出现“读取结束”错误。这段代码真的有效吗? 事实上,摆脱管道流。使用一个线程并在运行过程中执行所有处理。 摆脱现成的测试。这是一个额外的系统调用。一直读到流的末尾。 使用BufferedWriter而不是BufferedOutputStream,停止将所有这些字符串转换为字节,并使用BufferedWriter.newLine而
尝试一些明智的System.out.println语句来识别代码中的慢区域。你也许可以引入休息;在其中一个循环中,如果满足条件,则不必继续处理数据。如果这还不能缩小范围,那么试着使用java分析器。你试过评测它吗?你需要找出瓶颈在哪里。是的,我正在使用VisualVM来分析它。我还发现,对于print语句,代码在实际数据转换、读取输入流和处理它时非常快。读一个3.5gb的文件,处理一些文本并将其写入一个新的文件,我应该期望什么样的程序运行时间?我所做的只是将缓冲区增加到建议的32k,程序的速度只增加了5倍!我忘了我有这么小的设置来测试小文件。我将不得不研究使用BufferedWriter,我以前一直在向streams写入数据。这是因为AWS PUTobject需要一个输入流作为参数。如果将我的缓冲区增加到64k,这会导致处理4gb的较大文件时出现问题吗?我在xms3000m会有堆问题吗?如何?64K是64K。这与文件的大小无关。但不要期望64K的速度是32K的两倍。好处是渐进的。大多数5x来自于从1K到8K。上面提到的在运行时配置中使用64k缓冲区和2800mb初始堆。我已经设法将程序运行时间缩短到2.5小时,以处理一个3.4gb的文本文件。你也可以在ZipInputStream下尝试BufferedInputStream,但可能已经有了一个,所以可能不会有什么不同。最后一条建议:不要在写循环中刷新。我没有得到嵌套的readLine循环:外部循环只执行一次。
public class DataMiner {
private static BigData app = new BigData();
private static DomainOfConstants doc = new DomainOfConstants();
private static Logger log = Logger.getLogger(DataMiner.class);
private static DBManager conn = new DBManager();
private static java.sql.Connection con = null;
private static AmazonS3 s3Client;
private static Iterator<String> itr;
private static List<String> entries = new ArrayList<String>();
private static S3Object s3Object;
private static ObjectMetadata meta;
public static InputStream dataStream;
public static byte[] buffer = new byte[1024];
public static File file = new File(app.getCurrentPacsId()+".txt");
private static void obtainConnection(){
conn.connection();
entries = conn.grabDataSet();
conn.closeDb();
downloadBucket();
}
/*
*
* The Java heap size limits for Windows are:
* maximum possible heap size on 32-bit Java: 1.8 GB
* recommended heap size limit on 32-bit Java: 1.5 GB (or 1.8 GB with /3GB option)
*
* */
/*-------------Download and un-zip backup file-------------*/
private static void downloadBucket(){
try {
app.setAwsCredentials(doc.getAccessKey(), doc.getSecretKey());
s3Client = AmazonS3ClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider(app.getAwsCredentials())).withRegion(Regions.US_EAST_1).build();
System.out.println("Connected to S3");
itr = entries.iterator();
while(itr.hasNext()){
app.setBucketKey(itr.next());
String key = app.getBucketKey();
app.setCurrentPacsId(key);
s3Object = s3Client.getObject(new GetObjectRequest(doc.getDesiredBucket(), app.getBucketKey()));
try {
ZipInputStream zis = new ZipInputStream(s3Object.getObjectContent());
ZipEntry entry = zis.getNextEntry();
extractObjects(buffer, s3Client, zis, entry);
} catch (AmazonServiceException e) {
log.error(e);
} catch (SdkClientException e) {
log.error(e);
} catch (IOException e) {
log.error(e);
}
}
System.out.println("Processing complete");
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
}
public static void extractObjects(byte[] buffer, AmazonS3 s3Client, ZipInputStream zis, ZipEntry entry) throws IOException {
PipedOutputStream outputStream = null;
PipedInputStream is = null;
try {
while (entry != null)
{
String fileName = entry.getName();
if (fileName == "lib") {
fileName = entry.getName();
}
boolean containsBackup = fileName.contains(doc.getDesiredFile());
if (containsBackup == true) {
System.out.println("A back up file was found");
long start = System.currentTimeMillis();
formatSchemaName();
System.out.println("Extracting :" + app.getCurrentPacsId());
log.info("Extracting " + app.getCurrentPacsId() + ",
compressed: " + entry.getCompressedSize() + " bytes,
extracted: " +
entry.getSize() + " bytes");
//ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
outputStream = new PipedOutputStream();
is = new PipedInputStream(outputStream);
int len;
while ((len = zis.read(buffer)) >= 0)
{
outputStream.write(buffer, 0, len);
}
//InputStream is = new ByteArrayInputStream(outputStream.toByteArray());
meta = new ObjectMetadata();
meta.setContentLength(file.length());
fileName = app.getCurrentPacsId();
runDataConversion(is,s3Client,fileName);
recordTime(start);
is.close();
outputStream.close();
System.out.println("Unzip complete");
}
else{
System.out.println("No back up found");
}
entry = zis.getNextEntry();
}
zis.closeEntry();
zis.close();
} catch (AmazonServiceException e) {
log.error(e);
} catch (SdkClientException e) {
log.error(e);
}
}
/*------------Formating the replacment file name---------*/
private static void formatSchemaName(){
String s3Key = app.getCurrentPacsId();
String id = s3Key.replace(".zip", ".txt");
id = id.substring(id.indexOf("_"));
id = id.replaceFirst("_", "");
app.setCurrentPacsId(id);
}
/*---------------Process the data file----------------------*/
private static void runDataConversion(PipedInputStream is, AmazonS3 s3Client, String fileName) {
DataProcessor convert = new DataProcessor(s3Client);
convert.downloadBucket(is,fileName);
}
/*-------Records execution time of program in min/sec------*/
private static void recordTime(long start) throws IOException {
long end = System.currentTimeMillis();
long minutes = TimeUnit.MILLISECONDS.toMinutes(end - start);
long seconds = TimeUnit.MILLISECONDS.toSeconds(end - start);
System.out.println("Execution speed "+ minutes + ":" + (seconds % 60) +" min/sec\n");
}
public class DataProcessor {
private static AmazonS3 s3Client;
private static ObjectMetadata meta;
private static DomainOfConstants doc = new DomainOfConstants();
private static BigData app = new BigData();
public static File file = new File(app.getCurrentPacsId()+".txt");
private static Logger log = Logger.getLogger(DataProcessor.class);
//Construct connection
public DataProcessor (AmazonS3 s3Client){
this.s3Client = s3Client;
}
//
public void downloadBucket(PipedInputStream is, String fileName) {
try {
File dataStream = dataConversion(is);
s3Client.putObject(doc.getDestinationBucket(),FilenameUtils.getFullPath(doc.getDestinationKey()) + "Modified_"+ fileName, dataStream);
} catch (AmazonServiceException e) {
e.printStackTrace();
log.error(e);
} catch (SdkClientException e) {
e.printStackTrace();
log.error(e);
}
}
//Setup reading and writing streams
public static File dataConversion(PipedInputStream stream) {
BufferedReader reader = null;
BufferedOutputStream streamOut = null;
String line;
try {
reader = new BufferedReader(new InputStreamReader(stream,doc.getFileFormat()));
streamOut = new BufferedOutputStream(new FileOutputStream(file));
meta = new ObjectMetadata();
while(( line = reader.readLine() ) != null)
{
processLine(reader, streamOut, line);
}
}
catch (IOException e) {
e.printStackTrace();
} finally {
try {
streamOut.close();
reader.close();
} catch (IOException e) {
e.printStackTrace();
log.error(e);
}
}
return file;
}
/*---------------------------------------Data processing------------------------------------------------*/
/*-----------Process and print lines---------*/
private static void processLine(BufferedReader reader, BufferedOutputStream streamOut, String line) {
try {
String newLine = System.getProperty("line.separator");
while (reader.ready()) {
if (line.contains(doc.getInsert())) {
handleData(streamOut, line);
} else if (line.contains(doc.getUse())) {
handleSchemaName(streamOut, line);
} else {
streamOut.write(line.toLowerCase().getBytes(Charset.forName(doc.getFileFormat()).toString()));
streamOut.write(newLine.getBytes());
}
line = reader.readLine();
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
log.error(e);
} catch (IOException e) {
e.printStackTrace();
log.error(e);
}
}
/*-----------Replace-Schema-Name-----------*/
private static void handleSchemaName(BufferedOutputStream streamOut, String line) throws IOException {
line = line.replace(line, "USE " + "`" + doc.getSchemaName() + app.getCurrentPacsId() + "`;");
streamOut.write(line.getBytes(Charset.forName(doc.getFileFormat())));
}
/*--------Avoid-Formating-Data-Portion-of-file--------*/
private static void handleData(BufferedOutputStream streamOut, String line) throws IOException {
StringTokenizer tk = new StringTokenizer(line);
while (tk.hasMoreTokens()) {
String data = tk.nextToken();
if (data.equals(doc.getValue())) {
streamOut.write(data.toLowerCase().getBytes(Charset.forName(doc.getFileFormat()).toString()));
data = tk.nextToken();
while (tk.hasMoreTokens()) {
streamOut.write(data.getBytes(Charset.forName(doc.getFileFormat())));
data = tk.nextToken();
}
}
streamOut.write(line.toLowerCase().getBytes(Charset.forName(doc.getFileFormat().toString())));
streamOut.write(" ".getBytes(Charset.forName(doc.getFileFormat())));
}
}