Java 在ApacheLucene中使用springboot搜索带有空格的确切单词时,我遇到了一个问题?
我是ApacheLucene新手,我使用Springboot应用程序在ApacheLucene中创建了一个one应用程序,我在搜索短语查询时遇到问题。这里是我的搜索服务,如果有人帮助我的话。搜索短语查询Java 在ApacheLucene中使用springboot搜索带有空格的确切单词时,我遇到了一个问题?,java,spring,apache,lucene,Java,Spring,Apache,Lucene,我是ApacheLucene新手,我使用Springboot应用程序在ApacheLucene中创建了一个one应用程序,我在搜索短语查询时遇到问题。这里是我的搜索服务,如果有人帮助我的话。搜索短语查询 package com.cygnet.lucene.component; import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import java.util.List; impor
package com.cygnet.lucene.component;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
@Component
public class LuceneSearch {
// Folder path where Lucene will create index files from #fileInputDirPath
@Value("${lucene.indexDirPath}")
String indexDirPath;
public Map<String, String> searchContentFromFiles(String searchTerm) throws Exception {
Map<String, String> fileWithRelativePathMap = new HashMap<>();
// Get directory reference
Directory dir = FSDirectory.open(Paths.get(indexDirPath));
// Index reader - an interface for accessing a point-in-time view of a lucene
// index
IndexReader reader = DirectoryReader.open(dir);
System.out.println("MaxDoc:::"+reader.maxDoc());
// Create lucene searcher. It search over a single IndexReader.
IndexSearcher searcher = new IndexSearcher(reader);
// analyzer with the default stop words
Analyzer analyzer = new StandardAnalyzer();
// Query parser to be used for creating TermQuery
QueryParser qp = new QueryParser("contents", analyzer); // Change
System.out.println("searchTerm::"+searchTerm);
// Create the query
Query query = qp.parse(searchTerm);
// Search the lucene documents
TopDocs hits = searcher.search(query, 10000);
System.out.println("hits::"+hits.totalHits);
/** Highlighter Code Start ****/
fileWithRelativePathMap.put("totalCount", String.valueOf(hits.totalHits));
// Uses HTML <B></B> tag to highlight the searched terms
Formatter formatter = new SimpleHTMLFormatter();
// It scores text fragments by the number of unique query terms found
// Basically the matching score in layman terms
QueryScorer scorer = new QueryScorer(query);
// used to markup highlighted terms found in the best sections of a text
Highlighter highlighter = new Highlighter(formatter, scorer);
// It breaks text up into same-size texts but does not split up spans
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10000);
// breaks text up into same-size fragments with no concerns over spotting
// sentence boundaries.
// Fragmenter fragmenter = new SimpleFragmenter(10);
// set fragmenter to highlighter
highlighter.setTextFragmenter(fragmenter);
// Iterate over found results
for (int i = 0; i < hits.scoreDocs.length; i++) {
int docid = hits.scoreDocs[i].doc;
Document doc = searcher.doc(docid);
String rPath = doc.get("path");
String fileName = doc.get("fileName");
// Printing - to which document result belongs
System.out.println("Path " + " : " + rPath);
// Get stored text from found document
String text = doc.get("contents");
// Create token stream
TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer);
// Get highlighted text fragments
String[] frags = highlighter.getBestFragments(stream, text, 10);
fileWithRelativePathMap.put(fileName, rPath.replace("\\", "/"));
for (String frag : frags) {
System.out.println(frag);
}
}
dir.close();
return fileWithRelativePathMap;
}
}
package com.cygnet.lucene.component;
导入java.nio.file.path;
导入java.util.ArrayList;
导入java.util.HashMap;
导入java.util.List;
导入java.util.Map;
导入org.apache.lucene.analysis.Analyzer;
导入org.apache.lucene.analysis.TokenStream;
导入org.apache.lucene.analysis.standard.StandardAnalyzer;
导入org.apache.lucene.document.document;
导入org.apache.lucene.index.DirectoryReader;
导入org.apache.lucene.index.IndexReader;
导入org.apache.lucene.queryparser.classic.queryparser;
导入org.apache.lucene.search.indexsearch;
导入org.apache.lucene.search.Query;
导入org.apache.lucene.search.TopDocs;
导入org.apache.lucene.search.highlight.Formatter;
导入org.apache.lucene.search.highlight.Fragmenter;
导入org.apache.lucene.search.Highlighter;
导入org.apache.lucene.search.highlight.QueryScorer;
导入org.apache.lucene.search.highlight.SimpleHTMLFormatter;
导入org.apache.lucene.search.highlight.SimpleSpanFragmenter;
导入org.apache.lucene.search.highlight.TokenSources;
导入org.apache.lucene.store.Directory;
导入org.apache.lucene.store.FSDirectory;
导入org.springframework.beans.factory.annotation.Value;
导入org.springframework.stereotype.Component;
@组成部分
公共类LuceneSearch{
//Lucene将从#fileInputDirPath创建索引文件的文件夹路径
@值(${lucene.indexDirPath}”)
字符串索引路径;
公共映射searchContentFromFiles(字符串searchTerm)引发异常{
Map fileWithRelativePath=new HashMap();
//获取目录引用
Directory dir=FSDirectory.open(path.get(indexDirPath));
//索引阅读器-用于访问lucene的时间点视图的接口
//索引
IndexReader=DirectoryReader.open(dir);
System.out.println(“MaxDoc::”+reader.MaxDoc());
//创建lucene searcher。它通过单个IndexReader进行搜索。
IndexSearcher search=新的IndexSearcher(阅读器);
//具有默认停止字的分析器
Analyzer Analyzer=新的StandardAnalyzer();
//用于创建TermQuery的查询解析器
QueryParser qp=新的QueryParser(“内容”,分析器);//更改
System.out.println(“searchTerm::”+searchTerm);
//创建查询
Query=qp.parse(searchTerm);
//搜索lucene文档
TopDocs hits=searcher.search(查询,10000);
System.out.println(“hits::”+hits.totalHits);
/**荧光灯代码启动****/
filewithRelativePath.put(“totalCount”,String.valueOf(hits.totalHits));
//使用HTML B/B标记突出显示搜索的术语
Formatter Formatter=新的SimpleHTMLFormatter();
//它根据找到的唯一查询词的数量对文本片段进行评分
//基本上是外行术语中的匹配分数
QueryScorer scorer=新的QueryScorer(查询);
//用于标记文本最佳部分中突出显示的术语
Highlighter Highlighter=新的Highlighter(格式化程序、记分器);
//它将文本拆分为相同大小的文本,但不拆分跨距
Fragmenter Fragmenter=新的SimplePanFragmenter(记分器,10000);
//将文本拆分为相同大小的片段,而不必担心是否被发现
//句子边界。
//Fragmenter Fragmenter=新的SimpleFragmenter(10);
//将fragmenter设置为highlighter
荧光灯。setTextFragmenter(fragmenter);
//迭代找到的结果
for(int i=0;i
如果我当时使用(“”),我将搜索古吉拉特邦字体,但如果我在当时使用此字体搜索具有大小写敏感度的英语字体,我将无法搜索该字体。您为searchTerm提供了哪些值?您能举个例子吗?您为searchTerm提供了哪些值?你能举个例子吗?