Java Lucene 4.5如何不区分大小写进行搜索_Java_Lucene_Case Insensitive

Java Lucene 4.5如何不区分大小写进行搜索

java lucene

Java Lucene 4.5如何不区分大小写进行搜索,java,lucene,case-insensitive,Java,Lucene,Case Insensitive,我们已经实现了Java Lucene搜索引擎4.5，我尝试搜索内容，即使字段值不区分大小写（例如，如果我搜索名为“Banglore”的城市，我会得到一个结果，但当我搜索名为“Banglore”的城市时，我会得到0个结果）我使用了StandardAnalyzer来分析数据，并使用WildcardQuery来匹配Like条件（如前所述，我尝试了但没有成功）我不确定哪里出了问题。我非常感谢任何关于解决此案例敏感性问题的指导 public SearchHelper { Analyzer an

我们已经实现了Java Lucene搜索引擎4.5，我尝试搜索内容，即使字段值不区分大小写（例如，如果我搜索名为“Banglore”的城市，我会得到一个结果，但当我搜索名为“Banglore”的城市时，我会得到0个结果）

我使用了

StandardAnalyzer

来分析数据，并使用

WildcardQuery

来匹配

Like

条件（如前所述，我尝试了但没有成功）

我不确定哪里出了问题。我非常感谢任何关于解决此案例敏感性问题的指导

public SearchHelper
{
    Analyzer analyzer;

    Directory index;
    public IndexSearcher searcher = null;
    public IndexWriter indexWriter = null;
    public QueryParser parser = null;
    private static int hitsPerPage = 100;

    /**
     * @param indexFileLocation
     * @throws IOException
     */
    public SearchHelper(String indexFileLocation) throws IOException
    {
//        this.analyzer =new StandardAnalyzer();
        this.analyzer = new CaseStandardAnalyzer();
//        analyzer = new ThaiAnalyzer();
        this.index = FSDirectory.open(java.nio.file.Paths.get(indexFileLocation));
    }

    /**
     * @param create
     * @return
     * @throws IOException
     */
    public IndexWriter getIndexWriter(boolean create) throws IOException
    {
        if (indexWriter == null)
        {
            IndexWriterConfig iwc = new IndexWriterConfig(this.analyzer);
            this.indexWriter = new IndexWriter(this.index, iwc);
        }
        return this.indexWriter;
    } //End of getIndexWriter

    /**
     * @throws IOException
     */
    public void closeIndexWriter() throws IOException
    {
        if (this.indexWriter != null)
        {
             this.indexWriter.commit();//optimize(); LUCENE_36
             this.indexWriter.close();
        }
    } //End closeIndexWriter

    /**
     * @param indexFileLocation
     * @throws CorruptIndexException
     * @throws IOException
     */
    public void startSearch(String indexFileLocation) throws CorruptIndexException, IOException
    {
//        searcher = new IndexSearcher(FSDirectory.open(new File(indexFileLocation)));

        IndexReader reader = DirectoryReader.open(FSDirectory.open(java.nio.file.Paths.get(indexFileLocation)));
//        IndexReader.open(this.index);
//        open(getIndexWriter(true), true);
        this.searcher = new IndexSearcher(reader);
    }

    /**
     * @param fieldNames
     * @param fieldValues
     * @return
     * @throws IOException
     * @throws ParseException
     * 
     * <p></p>
     * https://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser
     */
    public ScoreDoc[] searchSEO(String[] fieldNames, String[] fieldValues, int limitSize) throws IOException, ParseException
    {
        this.analyzer = new StandardAnalyzer();
        int searchFieldSize = (null == fieldNames) ? 0 : fieldNames.length;

        BooleanQuery booleanQuery = new BooleanQuery();

        for (int i = 0; i < searchFieldSize; i++)
        {
             Query query1 = searchIndexWithWildcardQuery(fieldNames[i], fieldValues[i]);                
             addQueries(booleanQuery, query1, 2);               
        }

        TopScoreDocCollector collector = null; // Or use by default hitsPerPage instead limitSize

        if (limitSize > 0)
        {
            collector = TopScoreDocCollector.create(limitSize);
        } else {
            collector = TopScoreDocCollector.create(hitsPerPage);
        }

        this.searcher.search(booleanQuery,collector);

        return  collector.topDocs().scoreDocs;
    }

    /**
     * @param whichField
     * @param searchString
     * @return
     * @throws IOException
     * @throws ParseException
     */
    public Query searchIndexWithWildcardQuery(String whichField, String searchString) throws IOException, ParseException
    {
        Term term = addTerm(whichField, "*" + searchString + "*");
        Query query = new WildcardQuery(term);
        return query;
    }

    /**
     * @param whichField
     * @param searchString
     * @return
     */
    public Term addTerm(String whichField, String searchString)
    {
        Term term = new Term(whichField, searchString);
        return term;
    }

    /**
     * @param searchString
     * @param operation
     * @return
     * @throws ParseException
     */
    public Query addConditionOpertaion(String searchString, String operation) throws ParseException
    {
        Query query = null;
        if ("and".equals(operation))
        {
            parser.setDefaultOperator(QueryParser.AND_OPERATOR);
        } else if("or".equals(operation)) {
            parser.setDefaultOperator(QueryParser.AND_OPERATOR);
        }

        query = parser.parse(searchString);
        return query;
    }

    /**
     * @param booleanQuery <code>BooleanQuery</code>
     * @param q <code>Query</code>
     * @param type <code>int</code> , 1--> Must, 2-->Should, 3 --> Must Not
     */
    public void addQueries(BooleanQuery booleanQuery, Query q, int type)
    {
        switch(type)
        {
            case 1: booleanQuery.add(q, Occur.MUST);
                    break;
            case 2: booleanQuery.add(q, Occur.SHOULD);
                    break;
            default:booleanQuery.add(q, Occur.MUST_NOT);
                    break;
        } //End of switch
    }

    public QueryParser getParser()
    {
        return parser;
    }

    public void setParser(String fieldName)
    {
        this.parser = new QueryParser(fieldName, this.analyzer);
    }

    public void getDefaultByStatus(int status)
    {
        this.analyzer = new StandardAnalyzer();
        this.parser = new QueryParser("status", this.analyzer);
    }

    protected void doClear(File dir,boolean deleteSubDir)
    {
        for (File file: dir.listFiles())
        {
            if (file.isDirectory() && deleteSubDir)
            {
                doClear(file,deleteSubDir);
            }
            file.delete();
        }
    } //End of doClear();

    protected void doClose() throws IOException
    {
        this.searcher.getIndexReader().close();
    }

    public boolean add(Object Obj) throws Exception
    {
        User currentUser = (User)Obj;
        boolean isAdded = false;

        org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
        luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
        luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
        luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
        luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
        luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
        luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
        luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));

//        addRelatedFields(luceneDoc,city.getStateCode());

        IndexWriter writer = getIndexWriter(false);
        writer.addDocument(luceneDoc);

        closeIndexWriter();

        isAdded = true;
        System.out.println(isAdded);
        return isAdded;
    } // End of add

    public boolean update(Object Obj) throws Exception
    {
        boolean isUpdated = false;
        User currentUser = (User) Obj;

        org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
//        luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
        luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
        luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
        luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
        luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
        luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
        luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
        luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));

//        addRelatedFields(luceneDoc,city.getStateCode());

        IndexWriter writer = getIndexWriter(false);
        writer.updateDocument(new Term("login", currentUser.getLogin()),luceneDoc); 
        closeIndexWriter();

        isUpdated = true;
        return isUpdated;
    } // End of update

    public boolean delete(Object Obj) throws Exception
    {
        boolean isDeleted = false;
        User currentUser = (User) Obj;      

        Term deleteTerm = new Term("login", currentUser.getLogin());

        IndexWriter writer = getIndexWriter(false);
        writer.deleteDocuments(deleteTerm); // Or use Query
        writer.forceMergeDeletes();
        closeIndexWriter();

        isDeleted = true;

        return isDeleted;
    } // End of delete

    @Override
    public Object search(String[] fieldNames, String[] fieldValues, int returnType, int limit) throws Exception
    {
        Object obj = null;
        org.apache.lucene.search.ScoreDoc[] hits =  searchSEO(fieldNames,fieldValues,  limit);
        int hitSize = (null == hits) ? 0 : hits.length;

        System.out.println("total:" + hitSize);

        doClose();
        return obj;
    } // End of search

    public void addThreadUser()
    {
        User user = new User();
        addUserPojo(user);    
        add(user);
    }

    public void updateThreadUser()
    {
        User user = new User();
        addUserPojo(user);
        update(user);
    }

    public void deleteThreadUser()
    {
        User user = new User();
        addUserPojo(user);   
        delete(user);
    }

    private void addUserPojo(User user)
    {
        user.setOid(3);
        user.setLogin("senthil");
        user.setFirstName("Semthil");
        user.setLastName("Semthil");
        user.setStatus(1);
        user.setCity("Combiatore");
        user.setEmailId("semthil@xyz.com");
    }

    public void searchUser()
    {
        searchUser(new String[] {"login"}, new String[] {"Se"}, null);
    }

    public static void main(String[] args)
    {
        SearchHelper test = new SearchHelper();
        test.searchUser();
    }
}

按照您引用的帖子的建议使用：

    TokenStream stream = new StandardFilter(Version.LUCENE_CURRENT, tokenizer);
    stream = new LowerCaseFilter(Version.LUCENE_CURRENT, stream);

一个更完整的例子是。

您正在使用

StringField

对数据进行索引，但此字段将绕过analyzer链，始终将术语逐字索引为一个标记，而不考虑您的analyzer。如果要对数据进行分析，并且

StandardAnalyzer

已经进行了小写，则应使用

TextField

。

除此之外，

WildcardQuery

不会分析其术语，因此如果搜索Banglore，它将不会与索引中现在的小写Banglore匹配。您必须自己将搜索词小写（或在其上使用分析器）。

您可以使用custome compare类

class CaseIgonreCompare extends FieldComparator<String>{

private String field;
private String bottom;
private String topValue;
private BinaryDocValues cache;
private String[] values;

public CaseIgonreCompare(String field, int numHits) {
    this.field = field;
    this.values = new String[numHits];
}

@Override
public int compare(int arg0, int arg1) {
    return compareValues(values[arg0], values[arg1]);
}

@Override
public int compareBottom(int arg0) throws IOException {
    return compareValues(bottom, cache.get(arg0).utf8ToString());
}

@Override
public int compareTop(int arg0) throws IOException {
    return compareValues(topValue, cache.get(arg0).utf8ToString());
}

public int compareValues(String first, String second) {
    int val = first.length() - second.length();
    return val == 0 ? first.compareToIgnoreCase(second) : val;
};

@Override
public void copy(int arg0, int arg1) throws IOException {
   values[arg0] = cache.get(arg1).utf8ToString();
}

@Override
public void setBottom(int arg0) {
    this.bottom  = values[arg0];
}

@Override
public FieldComparator<String> setNextReader(AtomicReaderContext arg0)
        throws IOException {
    this.cache = FieldCache.DEFAULT.getTerms(arg0.reader(), 
            field  , true);
    return this;
}

@Override
public void setTopValue(String arg0) {
    this.topValue = arg0;
}

@Override
public String value(int arg0) {
    return values[arg0];
}
}

class CaseIgonRecare扩展了FieldComparator{
私有字符串字段；
私有串底；
私有字符串topValue；
私有二进制文件缓存；
私有字符串[]值；
public CaseIgonRecare（字符串字段，整数）{
this.field=字段；
this.values=新字符串[numHits]；
}
@凌驾
公共整数比较（整数arg0，整数arg1）{
返回比较值（值[arg0]，值[arg1]）；
}
@凌驾
public int comparebotom（int arg0）引发IOException{
返回compareValue（底部，cache.get（arg0.utf8ToString（））；
}
@凌驾
public int compareTop（int arg0）引发IOException{
返回compareValue（topValue，cache.get（arg0.utf8ToString（））；
}
公共整数比较值（字符串第一，字符串第二）{
int val=first.length（）-second.length（）；
返回val==0？第一个。CompareTignoreCase（第二个）：val；
};
@凌驾
公共无效副本（int arg0，int arg1）引发IOException{
值[arg0]=cache.get（arg1.utf8ToString（）；
}
@凌驾
公共void setBottom（int arg0）{
this.bottom=值[arg0]；
}
@凌驾
公共字段比较器setNextReader（AtomicReaderContext arg0）
抛出IOException{
this.cache=FieldCache.DEFAULT.getTerms（arg0.reader（），
字段，正确）；
归还这个；
}
@凌驾
公共void setTopValue（字符串arg0）{
this.topValue=arg0；
}
@凌驾
公共字符串值（int arg0）{
返回值[arg0]；
}
}

谢谢，我尝试使用Lucene 5.1.2，但没有成功的示例代码供参考，这有助于我搜索不区分大小写的值，例如：-如果我使用这些值中的任何一个进行搜索，Peter或Peter或Peter应该能够得到所有值的相同结果。谢谢Knutwalker，所以你建议我1）使用文本字段2）无法获取通配符查询不分析其术语？您的意思是不需要分析数据？@Kiran如果您的索引包含“banglore”，并且您的用户输入“Banglo”，并且您使用此字符串创建了通配符查询，那么它将不匹配，因为您提供给通配符查询的术语应该已经过分析。对于您的特定问题，调用

.toLowerCase

就足够了，但要想获得更多信息，您需要手动创建令牌流，或者使用QueryParser并让它为您完成。谢谢Knutwalker，我将尝试1）使用文本字段2）将以大写和小写形式写入数据，以便匹配wildcardQuery中的任意一个用户输入