Java Lucene搜索两个或多个不适用于Android的单词_Java_Android_Search_Lucene

Java Lucene搜索两个或多个不适用于Android的单词

java android search lucene

Java Lucene搜索两个或多个不适用于Android的单词,java,android,search,lucene,Java,Android,Search,Lucene,我正在Android上使用Lucene 3.6.2。使用的代码和观察结果如下索引代码： public void indexBookContent(Book book, File externalFilesDir) throws Exception { IndexWriter indexWriter = null; NIOFSDirectory directory = null; directory = new NIOFSDirectory(new File(exter

我正在Android上使用Lucene 3.6.2。使用的代码和观察结果如下

索引代码：

public void indexBookContent(Book book, File externalFilesDir) throws Exception {
    IndexWriter indexWriter = null;
    NIOFSDirectory directory = null;

    directory = new NIOFSDirectory(new File(externalFilesDir.getPath() + "/IndexFile", book.getBookId()));
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_36, new StandardAnalyzer(LUCENE_36));
    indexWriter = new IndexWriter(directory, indexWriterConfig);

    Document document = createFieldsForContent();

    String pageContent = Html.fromHtml(decryptedPage).toString();
    ((Field) document.getFieldable("content")).setValue(pageContent);
    ((Field) document.getFieldable("content")).setValue(pageContent);
    ((Field) document.getFieldable("content")).setValue(pageContent.toLowerCase());
}

private Document createFieldsForContent() {
    Document document = new Document();

    Field contentFieldLower = new Field("content", "", YES, NOT_ANALYZED);
    document.add(contentFieldLower);
    Field contentField = new Field("content", "", YES, ANALYZED);
    document.add(contentField);
    Field contentFieldNotAnalysed = new Field("content", "", YES, NOT_ANALYZED);
    document.add(contentFieldNotAnalysed);
    Field recordIdField = new Field("recordId", "", YES, ANALYZED);
    document.add(recordIdField);
    return document;
}

public JSONArray searchBook(String bookId, String searchText, File externalFieldsDir, String filter) throws Exception {
    List<SearchResultData> searchResults = null;
    NIOFSDirectory directory = null;
    IndexReader indexReader = null;
    IndexSearcher indexSearcher = null;

    directory = new NIOFSDirectory(new File(externalFieldsDir.getPath() + "/IndexFile", bookId));
    indexReader = IndexReader.open(directory);
    indexSearcher = new IndexSearcher(indexReader);

    Query finalQuery = constructSearchQuery(searchText, filter);

    TopScoreDocCollector collector = TopScoreDocCollector.create(100, false);
    indexSearcher.search(finalQuery, collector);
    ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
}

private Query constructSearchQuery(String searchText, String filter) throws ParseException {
    QueryParser contentQueryParser = new QueryParser(LUCENE_36, "content", new StandardAnalyzer(LUCENE_36));
    contentQueryParser.setAllowLeadingWildcard(true);
    contentQueryParser.setLowercaseExpandedTerms(false);

    String wildCardSearchText = "*" + QueryParser.escape(searchText) + "*";

    // Query Parser used.
    Query contentQuery = contentQueryParser.parse(wildCardSearchText);
    return contentQueryParser.parse(wildCardSearchText);
}

public void indexBookContent（Book Book，File externalFilesDir）引发异常{
IndexWriter IndexWriter=null；
NIOFSDirectory=null；
directory=newniofsdirectory（新文件（externalFilesDir.getPath（）+“/IndexFile”，book.getBookId（））；
IndexWriterConfig IndexWriterConfig=新的IndexWriterConfig（LUCENE_36，新标准分析器（LUCENE_36））；
indexWriter=新的indexWriter（目录，indexWriterConfig）；
Document Document=createFieldsForContent（）；
字符串pageContent=Html.fromHtml（decryptedPage.toString（）；
（（字段）document.getFieldable（“内容”））.setValue（页面内容）；
（（字段）document.getFieldable（“内容”））.setValue（页面内容）；
（（Field）document.getFieldable（“content”）.setValue（pageContent.toLowerCase（））；
}
私有文档createFieldsForContent（）{
文档=新文档（）；
Field contentFieldLower=新字段（“内容”，是，未分析）；
document.add（contentFieldLower）；
字段contentField=新字段（“内容”，是，已分析）；
document.add（contentField）；
Field contentFieldNotAnalysed=新字段（“内容”，是，未分析）；
document.add（contentFieldNotAnalysed）；
Field recordIdField=新字段（“recordId”，是，已分析）；
文件。添加（recordIdField）；
归还文件；
}
公共JSONArray searchBook（String bookId、String searchText、File externalFieldsDir、String filter）引发异常{
列表搜索结果=null；
NIOFSDirectory=null；
IndexReader IndexReader=null；
IndexSearcher IndexSearcher=null；
directory=newniofsdirectory（新文件（externalFieldsDir.getPath（）+“/IndexFile”，bookId））；
indexReader=indexReader.open（目录）；
indexSearcher=新的indexSearcher（indexReader）；
Query finalQuery=constructSearchQuery（searchText，过滤器）；
TopScoreDocCollector=TopScoreDocCollector.create（100，false）；
搜索（finalQuery，收集器）；
ScoreDoc[]scoreDocs=collector.topDocs（）.scoreDocs；
}
私有查询constructSearchQuery（字符串searchText，字符串筛选器）引发ParseException异常{
QueryParser contentQueryParser=新的QueryParser（LUCENE_36，“内容”，新标准分析器（LUCENE_36））；
contentQueryParser.SetAllowReadingWildcard（true）；
contentQueryParser.SetLowercaseeExpandedTerms（false）；
字符串wildCardSearchText=“*”+QueryParser.escape（searchText）+“*”；
//使用查询解析器。
Query contentQuery=contentQueryParser.parse（通配符搜索文本）；
返回contentQueryParser.parse（通配符搜索文本）；
}

我经历了这一切，我的逻辑似乎没有什么不同

我怀疑这些字段会被覆盖。此外，我需要中文支持，这与此代码除了两个或两个以上的单词支持的问题

一个注释，在前面：

看到这样的搜索实现似乎马上有点奇怪。通过所有可用字符串进行线性搜索看起来是一种过于复杂的方法。我不知道您到底需要完成什么，但我怀疑您最好对文本进行适当的分析，而不是对关键字分析的文本进行双通配符，这样会执行得很差，并且在搜索中没有提供太多的灵活性

继续讨论更具体的问题：

您正在使用不同的分析方法多次分析同一字段中的相同内容

Field contentFieldLower = new Field("content", "", YES, NOT_ANALYZED);
document.add(contentFieldLower);
Field contentField = new Field("content", "", YES, ANALYZED);
document.add(contentField);
Field contentFieldNotAnalysed = new Field("content", "", YES, NOT_ANALYZED);
document.add(contentFieldNotAnalysed);

相反，如果您真的需要所有这些分析方法都可用于搜索，您可能应该在不同的字段中为它们编制索引。一起搜索这些是没有意义的，所以它们不应该在同一个字段中

那么你就有了这种模式：

Field contentField = new Field("content", "", YES, ANALYZED);
document.add(contentField);
//Somewhat later
((Field) document.getFieldable("content")).setValue(pageContent);

别这样，这没道理。只需将内容传递到构造函数中，并将其添加到文档中：

Field contentField = new Field("content", pageContent, YES, ANALYZED);
document.add(contentField);

特别是如果您选择在同一字段中继续以多种方式进行分析，则无法在不同的字段实现中获得一个（

getFieldable

将始终返回添加的第一个字段）

此查询：

String wildCardSearchText = "*" + QueryParser.escape(searchText) + "*";

正如您所提到的，它不能很好地处理多个术语。它与QueryParser语法相冲突。最终的结果是：

*两个术语*

，搜索方式如下：

field:*two field:terms*

它不会生成任何与关键字字段匹配的内容（大概是这样）。QueryParser根本不能很好地处理这种查询。您需要在此处自行构造通配符查询：

WildcardQuery query  = new WildcardQuery(new Term("field", "*two terms*"));

我似乎不明白你到底有什么问题。就像您在链接中提到的，当您输入多个word时，不会返回正确的结果。你在哪个领域搜索，通过哪个查询，给出一些例子让我在这里陈述我的观察结果。搜索单个单词效果很好，单个中文单词和特殊字符也很好。但是如果我搜索两个单词，我不会得到任何结果。我将更新上面的代码，以指定注释的查询详细信息。我之所以使用document.getFieldable，是因为我使用相同的方法为“内容”之外的项目创建了各种文档。我现在已经改正了。效果很好。谢谢