Java 索引过程中的Lucene问题
我对lucene索引有问题,我的文档包括文本、字段和文档号。我的查询包含标题和描述。我对相关性有一个判断。问题是当我计算地图时,它非常小(0.017)。然而,我的frieds的fiding值为0.13。我想我在IndexFiles类中遇到了一个问题??你能帮我吗?^-^Java 索引过程中的Lucene问题,java,lucene,Java,Lucene,我对lucene索引有问题,我的文档包括文本、字段和文档号。我的查询包含标题和描述。我对相关性有一个判断。问题是当我计算地图时,它非常小(0.017)。然而,我的frieds的fiding值为0.13。我想我在IndexFiles类中遇到了一个问题??你能帮我吗?^-^ public class IndexFiles { public IndexFiles() {} public static void main(String[] args) throws IO
public class IndexFiles {
public IndexFiles() {}
public static void main(String[] args) throws IOException, ParseException {
ReadDocuments t = new ReadDocuments();
List<DocumentsParser> docs = new ArrayList<>();
t.readXml(docs, "documents");
final String FIELD_PATH = "path";
final String FIELD_CONTENTS = "contents";
String indexPath = "index1";
Directory dir = FSDirectory.open(new File(indexPath));
Reader r=new FileReader(new File("stopwords.txt"));
StandardAnalyzer analyzer=new StandardAnalyzer(Version.LUCENE_40,r);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,analyzer);
/* use BM25 similarity*/
Similarity bm25similarity = new BM25Similarity();
iwc.setSimilarity(bm25similarity);
IndexWriter indexWriter = new IndexWriter(dir, iwc);
for (DocumentsParser doc : docs){
Document document = new Document();
document.add(new StringField("DocNo", doc.getDOCNO(), Field.Store.YES));
document.add(new TextField("TEXT", doc.getTEXT()+" "+doc.getHEAD(),Field.Store.YES));
indexWriter.addDocument(document); }
indexWriter.close();}}
公共类索引文件{
公共索引文件(){}
公共静态void main(字符串[]args)引发IOException,ParseException{
ReadDocuments t=新的ReadDocuments();
列表文档=新建ArrayList();
t、 readXml(文档,“文档”);
最终字符串字段\u PATH=“PATH”;
最终字符串字段\u CONTENTS=“CONTENTS”;
字符串indexPath=“index1”;
Directory dir=FSDirectory.open(新文件(indexPath));
Reader r=新文件阅读器(新文件(“stopwords.txt”);
StandardAnalyzer=新的StandardAnalyzer(版本.LUCENE_40,r);
IndexWriterConfig iwc=新的IndexWriterConfig(Version.LUCENE_40,analyzer);
/*使用BM25相似性*/
相似性bm25similarity=新的bm25similarity();
iwc.集合相似性(BM25相似性);
IndexWriter IndexWriter=新的IndexWriter(dir,iwc);
对于(DocumentsParser doc:docs){
文档=新文档();
添加(新的StringField(“DocNo”,doc.getDOCNO(),Field.Store.YES));
添加(新文本字段(“TEXT”,doc.getTEXT()+“”+doc.getHEAD(),Field.Store.YES));
indexWriter.addDocument(文档);}
indexWriter.close();}
/类搜索文件/
公共类搜索文件{
公共静态void main(字符串[]args)引发异常{
SearchFiles ch=新的SearchFiles();
searchStemTfidfQLong();
}
SearchFiles(){}
public static void searchStemTfidfQLong()引发ParseException,IOException{
字符串索引=“index1”;
字符串字段=“文本”;
int hitsPerPage=1000;
IndexReader=DirectoryReader.open(FSDirectory.open(新文件(索引));
IndexSearcher search=新的IndexSearcher(阅读器);
/*使用BM25相似性*/
相似性bm25similarity=新的bm25similarity();
searcher.setSimilarity(bm25similarity);
Reader r=新文件阅读器(新文件(“stopwords.txt”);
StandardAnalyzer=新的StandardAnalyzer(版本.LUCENE_40,r);
QueryParser parser=新的QueryParser(Version.LUCENE_40,field,analyzer);
int i=0;
File File=新文件(“fichier.txt”);
FileWriter writere=新的FileWriter(file.getAbsoluteFile(),true);
对于(主题:Parser.getquerytopics(Parser.filename)){
/*查询chort*/
字符串queryChort=topic.getTitle();
queryChort=queryChort.replaceAll(“([\\(\\):/\\\\”,\\s\“]),”).trim();
i++;
//writere.write(queryshort+“\n”);
Query=parser.parse(queryChort);
System.out.println(“查询号:”+(i));
搜索者。搜索(查询,1000);
doSearch(i、搜索者、查询、命中率页面);
}
reader.close();
writere.close();
}
公共静态void doSearch(int-idReq、indexsearch-search、Query-Query、int-hitsPerPage)引发IOException{
TopDocs results=searcher.search(查询、空、命中率页面);
System.out.println(查询);
ScoreDoc[]点击次数=结果。scoreDocs;
int numTotalHits=results.totalHits;
System.out.println(numTotalHits+“总匹配文档”);
int start=1;
int end=Math.min(numTotalHits,hitsPerPage);
File File=新文件(“File.txt”);
FileWriter writer=新的FileWriter(file.getAbsoluteFile(),true);
文件file1=新文件(“fichier.txt”);
FileWriter writere=新的FileWriter(file1.getAbsoluteFile(),true);
for(int i=start;i
public class SearchFiles {
public static void main(String[] args) throws Exception {
SearchFiles ch=new SearchFiles();
searchStemTfidfQLong();
}
SearchFiles() {}
public static void searchStemTfidfQLong() throws ParseException, IOException{
String index = "index1";
String field = "TEXT";
int hitsPerPage = 1000;
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
IndexSearcher searcher = new IndexSearcher(reader);
/* use BM25 similarity*/
Similarity bm25similarity = new BM25Similarity();
searcher.setSimilarity(bm25similarity);
Reader r=new FileReader(new File("stopwords.txt"));
StandardAnalyzer analyzer=new StandardAnalyzer(Version.LUCENE_40,r);
QueryParser parser = new QueryParser(Version.LUCENE_40,field,analyzer);
int i=0;
File file = new File("fichier.txt");
FileWriter writere=new FileWriter(file.getAbsoluteFile(), true);
for(Topic topic : Parser.getQuerysTopics(Parser.filename)){
/*query chort*/
String queryChort=topic.getTitle();
queryChort=queryChort.replaceAll("([<>\\(\\):/\\\\',\\s\"])", " ").trim();
i++;
//writere.write(queryChort+"\n");
Query query = parser.parse(queryChort);
System.out.println("Query number : "+(i));
searcher.search(query,1000);
doSearch(i, searcher, query, hitsPerPage);
}
reader.close();
writere.close();
}
public static void doSearch(int idReq, IndexSearcher searcher, Query query, int hitsPerPage) throws IOException {
TopDocs results = searcher.search(query, null, hitsPerPage);
System.out.println(query);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching documents");
int start = 1;
int end = Math.min(numTotalHits, hitsPerPage);
File file = new File("file.txt");
FileWriter writer=new FileWriter(file.getAbsoluteFile(), true);
File file1 = new File("fichier.txt");
FileWriter writere=new FileWriter(file1.getAbsoluteFile(), true);
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
String DocNo = doc.get("DocNo");
writere.write(DocNo+"\n");
if (DocNo != null) {
writer.write(idReq+" 0 "+DocNo+" "+String.format("%.6f", new Double(hits[i].score))+" "+ i + " "+"ScoreID"+"\n");
} else {
System.out.println((i + 1) + ". " + "No DocNo for this document"); } }
writer.close();
writere.close();}}