Java 外接程序ArrayList<;整数>;耗时太长(超过50000个节点)
我有个问题。我想创建一个基于红外系统的搜索引擎。所以,我有一些文件,我获取我需要的信息,并将它们存储在HashMaps、TreeMaps、ArrayLists e.t.c等结构中。然后,我想将这些信息写入文件中。因此,我同时打开两个文件写入程序。但是我加入了越来越多的字符串 但是这个过程太长了。我不知道为什么。当我将所有内容放入FileWriter时,我会通过Java 外接程序ArrayList<;整数>;耗时太长(超过50000个节点),java,Java,我有个问题。我想创建一个基于红外系统的搜索引擎。所以,我有一些文件,我获取我需要的信息,并将它们存储在HashMaps、TreeMaps、ArrayLists e.t.c等结构中。然后,我想将这些信息写入文件中。因此,我同时打开两个文件写入程序。但是我加入了越来越多的字符串 但是这个过程太长了。我不知道为什么。当我将所有内容放入FileWriter时,我会通过close()将其关闭 您认为问题在于每次在缓冲区中添加新字符串时的重新分配吗 我是否应该遵循另一种策略,打开缓冲区、写入、关闭缓冲区,然
close()
将其关闭
您认为问题在于每次在缓冲区中添加新字符串时的重新分配吗
我是否应该遵循另一种策略,打开缓冲区、写入、关闭缓冲区,然后下次再次打开以在前一个数据的末尾写入?这会花更少的时间吗
注:对于一个小的输入文件,代码完全按照我的要求工作。问题是当我使用大量的输入文件时
public static void writeWordsandDfInFile(Map<String, Word> tmpMap) throws IOException
{
Set tmpSet = tmpMap.entrySet();//Transform to Set for quick iteration and printing
Iterator tmpIt = tmpSet.iterator();
String le3h=null;
int bytesPostingFile;
int bytesVocabularyFile;
String str_out = null;
String prev_str_out = null;
String str_out2 = null;
String str_tmp;
String str_tmp2;
String Tstrt;
int prevctr=0;
int flag=0;
int i=0;
int j;
int k;
int flag2;
int flag3;
int docId;
//////////////////
int SIZEDocumentsFileBytes;
int prevInDocumentsFileBytes = 0;
int newInDocumentsFileBytes = 0;
int prwth_kataxwrhsh;
int ctrPostingFileBytes=0;
int prwthMonofora=0;
giveWrdTakeBytePos=new HashMap<String,Integer>();//8a t dinw thn le3h kai 8a mou epistrefei thn 8esh se bytes mesa sto VocabularyFile.txt
// Create file
FileWriter fstream = new FileWriter(vocabularyFile.getPath());
BufferedWriter out = new BufferedWriter(fstream);
out.
out.write("Le3h Df PosInPostingFile.txt\n\n");
str_tmp=("Le3h Df PosInPostingFile.txt\n\n");
// Create file
FileWriter fstream2 = new FileWriter(postingFile.getPath());
BufferedWriter out2 = new BufferedWriter(fstream2);
out2.write("DocId Tf LineInFile PosInDocumentsFile\n\n");
str_tmp2=("DocId Tf LineInFile PosInDocumentsFile\n\n");
PostingFileBytes=new ArrayList<Integer>();//krataw ta bytes gia kaue eggrafh sto PostingFile
flag=0;
i=0;
while(tmpIt.hasNext())
{
Map.Entry m = (Map.Entry) tmpIt.next();
le3h=(String)m.getKey();
Set s = tmpMap.get(le3h).getDocList().entrySet();
Iterator it = s.iterator();
Map.Entry mm =(Map.Entry)it.next();
docId=(Integer)mm.getKey();
Set ss=tmpMap.get(le3h).getDocList().keySet();
Set stf=tmpMap.get(le3h).getTf().keySet();
Iterator ssIt = ss.iterator();
flag2=0;
prwth_kataxwrhsh=0;
while(ssIt.hasNext())
{
docId=(Integer)ssIt.next();
out2.write(docId+" "+tmpMap.get(le3h).getTf(docId));//grafw sto VocabularyFile.txt thn ka8e le3h kai to Df ths
if(flag2==0)
{
str_out2=(docId+" "+tmpMap.get(le3h).getTf(docId));
flag2=1;
}
else
{
str_out2=(docId+" "+tmpMap.get(le3h).getTf(docId));
}
flag3=0;
Tstrt=null;
for(k=0;k<tmpMap.get(le3h).ByteList.get(docId).size();k++)
{
out2.write(" "+tmpMap.get(le3h).ByteList.get(docId).get(k));
if(flag3==0)
{
Tstrt=(" "+tmpMap.get(le3h).ByteList.get(docId).get(k));
flag3=1;
}
else
{
Tstrt=Tstrt+(" "+tmpMap.get(le3h).ByteList.get(docId).get(k));
}
}
str_out2=str_out2+Tstrt;
out2.write(" ->"+DocumentsFileBytes.get(docId)+"\n");
str_out2=str_out2+(" ->"+DocumentsFileBytes.get(docId)+"\n");
bytesPostingFile=str_out2.toString().length();
////////////////////////////////////////////////////////////////////////////////////////////////
//................................................................................................................................
SIZEDocumentsFileBytes=PostingFileBytes.size();
if(prwthMonofora==0)
{
prevInDocumentsFileBytes=str_tmp2.toString().length();
prwthMonofora=1;
PostingFileBytes.add(prevInDocumentsFileBytes);
ctrPostingFileBytes=0;//dld. parxei kataxwrish sthn 8esh 0 tou posting file
newInDocumentsFileBytes=prevInDocumentsFileBytes + bytesPostingFile;
//System.out.println("EPOMENH: "+newInDocumentsFileBytes);
}
else
{
if(prwth_kataxwrhsh==0)//gia ka8e le3h mono thn prwth fora kai as exei DF>1
{
//System.out.println("Prohg. Timh:"+prevInDocumentsFileBytes);
prevInDocumentsFileBytes=newInDocumentsFileBytes;//apo prin
//System.out.println("BAZW: "+prevInDocumentsFileBytes);
PostingFileBytes.add(prevInDocumentsFileBytes);
ctrPostingFileBytes++;
prwth_kataxwrhsh=1;
}
else
{
prevInDocumentsFileBytes=newInDocumentsFileBytes;
}
newInDocumentsFileBytes=prevInDocumentsFileBytes + bytesPostingFile;
//System.out.println("EPOMENH: "+newInDocumentsFileBytes);
}
}
//------------------------------------------------------------------------------------------------------------------
int ptr=ctrPostingFileBytes;
out.write(le3h+" "+tmpMap.get(le3h).getDf());//grafw sto VocabularyFile.txt thn ka8e le3h kai to Df ths
out.write(" ->"+PostingFileBytes.get(ptr)+"\n");
if(flag==0)//thn prwth fora
{
str_out=(le3h+" "+tmpMap.get(le3h).getDf()+" ->"+PostingFileBytes.get(ptr)+"\n");
giveWrdTakeBytePos.put(le3h, str_tmp.toString().length());
flag=1;
prev_str_out=str_tmp+str_out;
}
else
{
giveWrdTakeBytePos.put(le3h, prev_str_out.toString().length());
str_out=str_out+(le3h+" "+tmpMap.get(le3h).getDf()+" ->"+PostingFileBytes.get(ptr)+"\n");
prev_str_out=prev_str_out+(le3h+" "+tmpMap.get(le3h).getDf()+" ->"+PostingFileBytes.get(ptr)+"\n");
}
//................................................................................................................................
}
//Close the output stream
out.close();
//Close the output stream
out2.close();
}
publicstaticvoidwritewordsanddnfile(映射tmpMap)抛出IOException
{
Set tmpSet=tmpMap.entrySet();//转换为Set以进行快速迭代和打印
迭代器tmpIt=tmpSet.Iterator();
字符串le3h=null;
int字节发布文件;
int字节语音文件;
字符串str_out=null;
字符串prev_str_out=null;
字符串str_out2=null;
字符串strutmp;
字符串str_tmp2;
字符串Tstrt;
int prevctr=0;
int标志=0;
int i=0;
int j;
int k;
int flag2;
int flag3;
int-docId;
//////////////////
int-SIZEDocumentsFileBytes;
int prevInDocumentsFileBytes=0;
int newInDocumentsFileBytes=0;
内华达州(kataxwrhsh);
int-ctrPostingFileBytes=0;
int-prwthMonofora=0;
giveWrdTakeBytePos=new HashMap();//8a t dinw thn le3h kai 8a mou epistrefei thn 8esh se bytes mesa sto vocabulary file.txt
//创建文件
FileWriter fstream=新的FileWriter(vocabularyFile.getPath());
BufferedWriter out=新的BufferedWriter(fstream);
出来
out.write(“Le3h Df PosInPostingFile.txt\n\n”);
str_tmp=(“Le3h Df PosInPostingFile.txt\n\n”);
//创建文件
FileWriter fstream2=新的FileWriter(postingFile.getPath());
BufferedWriter out2=新的BufferedWriter(fstream2);
out2.写入(“DocId Tf lineinfle PosInDocumentsFile\n\n”);
str_tmp2=(“DocId Tf lineinfle PosInDocumentsFile\n\n”);
PostingFileBytes=new ArrayList();//krataw ta bytes gia kaue eggrafh sto PostingFile
flag=0;
i=0;
while(tmpIt.hasNext())
{
Map.Entry m=(Map.Entry)tmpIt.next();
le3h=(字符串)m.getKey();
Set s=tmpMap.get(le3h.getDocList().entrySet();
迭代器it=s.Iterator();
Map.Entry mm=(Map.Entry)it.next();
docId=(整数)mm.getKey();
设置ss=tmpMap.get(le3h.getDocList().keySet();
设置stf=tmpMap.get(le3h.getTf().keySet();
迭代器ssIt=ss.Iterator();
flag2=0;
prwth_kataxwrhsh=0;
while(ssIt.hasNext())
{
docId=(整数)ssIt.next();
out2.write(docId+“”+tmpMap.get(le3h.getTf(docId));//grafw sto VocabularyFile.txt thn ka8e le3h kai to Df th
如果(flag2==0)
{
str_out2=(docId+“”+tmpMap.get(le3h.getTf(docId));
flag2=1;
}
其他的
{
str_out2=(docId+“”+tmpMap.get(le3h.getTf(docId));
}
flag3=0;
Tstrt=null;
对于(k=0;k1
{
//System.out.println(“Prohg.Timh:+prevInDocumentsFileBytes”);
prevInDocumentsFileBytes=newInDocumentsFileBytes;//apo prin
//System.out.println(“BAZW:+prevInDocumentsFileBytes”);
PostingFileBytes.add(prevInDocumentsFileBytes);
ctrPostingFileBytes++;
prwth_kataxwrhsh=1;
}
其他的
{
prevInDocumentsFileBytes=新的indocumentsFileBytes;
}
newInDocumentsFileBytes=prevInDocumentsFileBytes+bytesPostingFile;
//System.out.println(“EPOMENH:+newInDocumentsFileBytes”);
}
}
//------------------------------------------------------------------------------------------------------------------
int ptr=ctrPostingFileBytes;
out.write(le3h+“”+tmpMap.get(le3h.getDf());//grafw sto VocabularyFile.txt thn ka8e le3h kai to Df th
out.write(“->”+PostingFileBytes.get(ptr)+“\n”);
如果(标志==0)//thn prwth fora
{
str_out=(le3h+“”+tmpMap.get(le3h).getDf()+“->”+PostingFileBytes.get(ptr)+“\n”);
给定wrdtakebytepos.put(le3h,str_tmp.toString().length());
flag=1;
prev_str_out=str_tmp+str_out;
}
其他的
{
给定wrdtakebytepos.put(le3h,prev_str_out.toString().length());
str_out=str_out+(le3h+“”+tmpMap.get(le3h).getDf()+“->”+PostingFileBytes.get(ptr)+“\n”);
prev_str_out=prev_str_out+(le3h+“”+tmpMap.get(le3h).getDf()+“->”+PostingFileBytes.get(ptr)+“\n”);
}
//................................................................................................................................
}
//关闭输出流
out.close();
//关闭输出流
out2.close();
}
从我所看到的情况来看,你永远不会将数据追加到文件中,而总是将其写入新文件。但是从你上面所写的内容(没有阅读全部代码)来看,你希望将数据追加到文件中
new FileWriter("path", true);
这对你有帮助吗
另一个建议是删除文件并使用以下内容:
public static void foo()
{
// ...
byte[] fifeMBByteAryOne = new byte[5242880];
ByteArrayStream bStream = new ByteArrayStream(fifeMBByteAryOne);
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(bStream));
byte[] fifeMBByteAryTwo = new byte[5242880];
ByteArrayStream bStream2 = new ByteArrayStream(fifeMBByteAryTwo);
BufferedWriter out2 = new BufferedWriter(new OutputStreamWriter(bStream2));
// ...
}
private static class ByteArrayStream extends OutputStream {
int index = 0;
byte[] container;
public ByteArrayStream(byte[] container) {
this.container = container;
}
@Override
public void write(int b) throws IOException {
container[index++] = (byte)b;
}
}
然后让它再次运行,看看需要多长时间。如果它像以前一样慢,文件不是你的问题
在阅读了代码之后,我相当肯定您是java编程的学生或初学者,这是肯定的
Set s = currentWord.getDocList().entrySet();
Iterator it = s.iterator();
Map.Entry mm = (Map.Entry) it.next();
docId = (Integer) mm.getKey();
// at the beginning of a loop
long startedAt = new Date().getTime();
// somewhen within the loop:
System.out.println("in situation X " + (new Date().getTime()-startedAt);