如何对Lucene中的字段使用前缀查询?

如何对Lucene中的字段使用前缀查询?,lucene,Lucene,我试图在Lucene中使用前缀查询来实现自动完成。我已经做了一个简单的测试,我认为应该工作,但它没有。我正在索引一些简单字符串,并使用KeywordAnalyzer确保它们没有标记化,但我的搜索仍然不匹配任何内容。如何索引和搜索字段以获得前缀匹配 这是我用来测试的单元测试。除了autocomplete和singleTerm方法外,其他方法都通过了 package com.sample.index; import org.apache.lucene.analysis.Analyzer; impo

我试图在Lucene中使用前缀查询来实现自动完成。我已经做了一个简单的测试,我认为应该工作,但它没有。我正在索引一些简单字符串,并使用KeywordAnalyzer确保它们没有标记化,但我的搜索仍然不匹配任何内容。如何索引和搜索字段以获得前缀匹配

这是我用来测试的单元测试。除了autocomplete和singleTerm方法外,其他方法都通过了

package com.sample.index;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.util.HashMap;

import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertTrue;

public class TestIndexStuff {
    public static final String FIELD_AUTOCOMPLETE = "autocomplete";
    public static final String FIELD_NORMAL = "normal";
    private IndexSearcher searcher;
    private PerFieldAnalyzerWrapper analyzer;

    @Before
    public void init() throws IOException {
        RAMDirectory idx = new RAMDirectory();

        HashMap<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>();
        fieldAnalyzers.put(FIELD_AUTOCOMPLETE, new KeywordAnalyzer());
        analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer);

        IndexWriter writer = new IndexWriter(idx, config);
        addDocs(writer);
        writer.close();

        searcher = new IndexSearcher(IndexReader.open(idx));
    }

    private void addDocs(IndexWriter writer) throws IOException {
        for (String text : new String[]{"Fred Rogers", "Toni Reed Preckwinkle", "Randy Savage", "Kathryn Janeway", "Madonna", "Fred Savage"}) {
            Document doc = new Document();
            doc.add(new Field(FIELD_NORMAL, text, Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field(FIELD_AUTOCOMPLETE, text, Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.addDocument(doc);
        }

    }

    @Test
    public void prefixParser() throws ParseException {
        Query prefixQuery = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fre*");
        assertTrue(prefixQuery instanceof PrefixQuery);

        Query normalQuery = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fred");
        assertFalse(normalQuery instanceof PrefixQuery);
    }

    @Test
    public void normal() throws ParseException, IOException {
        Query query = new QueryParser(Version.LUCENE_35, FIELD_NORMAL, analyzer).parse("Fred");
        TopDocs topDocs = searcher.search(query, 10);
        assertEquals(2, topDocs.totalHits);
    }

    @Test
    public void autocomplete() throws IOException, ParseException {
        Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fre*");
        TopDocs topDocs = searcher.search(query, 10);
        assertEquals(2, topDocs.totalHits);
    }

    @Test
    public void singleTerm() throws ParseException, IOException {
        Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Mado*");
        TopDocs topDocs = searcher.search(query, 10);
        assertEquals(1, topDocs.totalHits);
    }
}
package com.sample.index;
导入org.apache.lucene.analysis.Analyzer;
导入org.apache.lucene.analysis.KeywordAnalyzer;
导入org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
导入org.apache.lucene.analysis.standard.StandardAnalyzer;
导入org.apache.lucene.document.document;
导入org.apache.lucene.document.Field;
导入org.apache.lucene.index.IndexReader;
导入org.apache.lucene.index.IndexWriter;
导入org.apache.lucene.index.IndexWriterConfig;
导入org.apache.lucene.queryParser.ParseException;
导入org.apache.lucene.queryParser.queryParser;
导入org.apache.lucene.search.indexsearch;
导入org.apache.lucene.search.PrefixQuery;
导入org.apache.lucene.search.Query;
导入org.apache.lucene.search.TopDocs;
导入org.apache.lucene.store.RAMDirectory;
导入org.apache.lucene.util.Version;
导入org.junit.Before;
导入org.junit.Test;
导入java.io.IOException;
导入java.util.HashMap;
导入静态junit.framework.Assert.assertEquals;
导入静态junit.framework.Assert.assertFalse;
导入静态junit.framework.Assert.assertTrue;
公共类TestIndexTuff{
公共静态最终字符串字段\u AUTOCOMPLETE=“AUTOCOMPLETE”;
公共静态最终字符串字段\u NORMAL=“NORMAL”;
私有索引搜索器;
专用性能分析仪;
@以前
public void init()引发IOException{
RAMDirectory idx=新的RAMDirectory();
HashMap fieldAnalyzers=新HashMap();
put(FIELD_AUTOCOMPLETE,new keywordanalyzers());
analyzer=新型PerfielDanalyzerRapper(新型标准分析仪(LUCENE_35版),现场分析仪);
IndexWriterConfig配置=新的IndexWriterConfig(Version.LUCENE_35,analyzer);
IndexWriter writer=新的IndexWriter(idx,配置);
addDocs(作者);
writer.close();
searcher=newindexsearcher(IndexReader.open(idx));
}
私有void addDocs(IndexWriter writer)引发IOException{
for(字符串文本:新字符串[]{“Fred Rogers”、“Toni Reed Preckwinkle”、“Randy Savage”、“Kathryn Janeway”、“Madonna”、“Fred Savage”}){
单据单据=新单据();
新增单据(新增字段(Field_NORMAL,text,Field.Store.YES,Field.Index.analysis));
添加文档(新字段(字段\自动完成,文本,字段.Store.YES,字段.索引.未分析));
writer.addDocument(doc);
}
}
@试验
public void prefixParser()引发ParseException{
Query prefixQuery=newqueryparser(Version.LUCENE_35,FIELD_AUTOCOMPLETE,analyzer);
assertTrue(prefixQuery的prefixQuery实例);
Query normalQuery=new QueryParser(Version.LUCENE_35,FIELD_AUTOCOMPLETE,analyzer).parse(“Fred”);
assertFalse(PrefixQuery的normalQuery实例);
}
@试验
public void normal()引发ParseException,IOException{
Query Query=new QueryParser(Version.LUCENE_35,FIELD_NORMAL,analyzer).parse(“Fred”);
TopDocs TopDocs=searcher.search(查询,10);
资产质量(2,topDocs.totalHits);
}
@试验
public void autocomplete()引发IOException,ParseException{
Query Query=new QueryParser(Version.LUCENE_35,FIELD_AUTOCOMPLETE,analyzer);
TopDocs TopDocs=searcher.search(查询,10);
资产质量(2,topDocs.totalHits);
}
@试验
public void singleTerm()引发ParseException,IOException{
Query Query=new QueryParser(Version.LUCENE_35,FIELD_AUTOCOMPLETE,analyzer);
TopDocs TopDocs=searcher.search(查询,10);
assertEquals(1,topDocs.totalHits);
}
}

编辑:为稍后阅读本文的人添加修订的代码,以显示更改后的完整测试,感谢@jpountz。不过,我并没有让事情混为一谈,而是选择将它们作为小写进行索引。我还添加了一个单元测试,以确保中间的一个术语将不匹配,因为这应该只匹配从搜索项开始的事物。

package com.sample.index;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.util.HashMap;

import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertTrue;

public class TestIndexStuff {
    public static final String FIELD_AUTOCOMPLETE = "autocomplete";
    public static final String FIELD_NORMAL = "normal";
    private IndexSearcher searcher;
    private PerFieldAnalyzerWrapper analyzer;

    @Before
    public void init() throws IOException {
        RAMDirectory idx = new RAMDirectory();

        HashMap<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>();
        fieldAnalyzers.put(FIELD_AUTOCOMPLETE, new KeywordAnalyzer());
        analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer);

        IndexWriter writer = new IndexWriter(idx, config);
        addDocs(writer);
        writer.close();

        searcher = new IndexSearcher(IndexReader.open(idx));
    }

    private void addDocs(IndexWriter writer) throws IOException {
        for (String text : new String[]{"Fred Rogers", "Toni Reed Preckwinkle", "Randy Savage", "Kathryn Janeway", "Madonna", "Fred Savage"}) {
            Document doc = new Document();
            doc.add(new Field(FIELD_NORMAL, text, Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field(FIELD_AUTOCOMPLETE, text.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.addDocument(doc);
        }

    }

    @Test
    public void prefixParser() throws ParseException {
        Query prefixQuery = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fre*");
        assertTrue(prefixQuery instanceof PrefixQuery);

        Query normalQuery = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fred");
        assertFalse(normalQuery instanceof PrefixQuery);
    }

    @Test
    public void normal() throws ParseException, IOException {
        Query query = new QueryParser(Version.LUCENE_35, FIELD_NORMAL, analyzer).parse("Fred");
        TopDocs topDocs = searcher.search(query, 10);
        assertEquals(2, topDocs.totalHits);
    }

    @Test
    public void autocomplete() throws IOException, ParseException {
        Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fre*");
        TopDocs topDocs = searcher.search(query, 10);
        assertEquals(2, topDocs.totalHits);
    }

    @Test
    public void beginningOnly() throws ParseException, IOException {
        Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("R*");
        TopDocs topDocs = searcher.search(query, 10);
        assertEquals(1, topDocs.totalHits);
    }

    @Test
    public void singleTerm() throws ParseException, IOException {
        Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Mado*");
        TopDocs topDocs = searcher.search(query, 10);
        assertEquals(1, topDocs.totalHits);
    }
}
package com.sample.index;
导入org.apache.lucene.analysis.Analyzer;
导入org.apache.lucene.analysis.KeywordAnalyzer;
导入org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
导入org.apache.lucene.analysis.standard.StandardAnalyzer;
导入org.apache.lucene.document.document;
导入org.apache.lucene.document.Field;
导入org.apache.lucene.index.IndexReader;
导入org.apache.lucene.index.IndexWriter;
导入org.apache.lucene.index.IndexWriterConfig;
导入org.apache.lucene.queryParser.ParseException;
导入org.apache.lucene.queryParser.queryParser;
导入org.apache.lucene.search.indexsearch;
导入org.apache.lucene.search.PrefixQuery;
导入org.apache.lucene.search.Query;
导入org.apache.lucene.search.TopDocs;
导入org.apache.lucene.store.RAMDirectory;
导入org.apache.lucene.util.Version;
导入org.junit.Before;
导入org.junit.Test;
导入java.io.IOException;
导入java.util.HashMap;
导入静态junit.framework.Assert.assertEquals;
导入静态junit.framework.Assert.assertFalse;
导入静态junit.framework.Assert.assertTrue;
公共类TestIndexTuff{
公共静态最终字符串字段\u AUTOCOMPLETE=“AUTOCOMPLETE”;
公共静态最终字符串字段\u NORMAL=“NORMAL”;
私有索引搜索器;
专用性能分析仪;
@以前
public void init()引发IOException{
RAMDirectory idx=新的RAMDirectory();
HashMap fieldAnalyzers=新的H
Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Mado*");
    QueryParser qp = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer);
    qp.setLowercaseExpandedTerms(false);
    Query query = qp.parse("Mado*");