Lucene索引:缺少文档

Lucene索引:缺少文档,lucene,Lucene,我们有一个非常基本的Lucene设置。我们最近注意到有些文档没有写入索引 这是我们创建文档的方式: private void addToDirectory(SpecialDomainObject specialDomainObject) throws IOException { Document document = new Document(); document.add(new TextField("id", String.valueOf(specialDomainO

我们有一个非常基本的Lucene设置。我们最近注意到有些文档没有写入索引

这是我们创建文档的方式:

private void addToDirectory(SpecialDomainObject specialDomainObject) throws IOException     {
    Document document = new Document();
    document.add(new TextField("id", String.valueOf(specialDomainObject.getId()), Field.Store.YES));
    document.add(new TextField("name", specialDomainObject.getName(), Field.Store.YES));
    document.add(new TextField("tags", joinTags(specialDomainObject.getTags()), Field.Store.YES));
    document.add(new TextField("contents", getContents(specialDomainObject), Field.Store.YES));

    for (Language language : getAllAssociatedLanguages(specialDomainObject)) {
        document.add(new IntField("languageId", language.getId(), Field.Store.YES));
    }
    specialDomainObjectIndexWriter.updateDocument(new Term("id", document.getField("id").stringValue()), document);
    specialDomainObjectIndexWriter.commit();
}
以下是创建分析器和索引编写器的方法:

<bean id="luceneVersion" class="org.apache.lucene.util.Version" factory-method="valueOf">
    <constructor-arg value="LUCENE_46"/>
</bean>

<bean id="analyzer" class="org.apache.lucene.analysis.standard.StandardAnalyzer">
    <constructor-arg ref="luceneVersion"/>
</bean>

<bean id="specialDomainObjectIndexWriter" class="org.apache.lucene.index.IndexWriter">
    <constructor-arg ref="specialDomainObjectDirectory" />
    <constructor-arg>
        <bean class="org.apache.lucene.index.IndexWriterConfig">
            <constructor-arg ref="luceneVersion"/>
            <constructor-arg ref="analyzer" />
            <property name="openMode" value="CREATE_OR_APPEND"/>
        </bean>
    </constructor-arg>
</bean>
createIndex()的实现方式如下:

@Override
public void createIndex() throws IOException {
    logger.trace("Preparing for index generation...");
    IndexWriter indexWriter = getIndexWriter();

    Date start = new Date();

    logger.trace("Deleting all documents from index...");
    indexWriter.deleteAll();

    logger.trace("Starting index generation...");
    long numberOfProcessedObjects = fillIndex();

    logger.debug("Index written in " + (new Date().getTime() - start.getTime()) + " milliseconds.");
    logger.debug("Number of processed objects: {}", numberOfProcessedObjects);
    logger.debug("Number of documents in index: {}", indexWriter.numDocs());

    indexWriter.commit();
    indexWriter.forceMerge(1);
}

@Override
protected long fillIndex() throws IOException {
    Page<SpecialDomainObject> specialDomainObjectsPage = specialDomainObjectRepository.findAll(new PageRequest(0, MAXIMUM_PAGE_ELEMENTS));
    while (true) {
        addToDirectory(specialDomainObjectsPage);
        if (specialDomainObjectsPage.hasNextPage()) {
            specialDomainObjectsPage =
                specialDomainObjectRepository.findAll(new PageRequest(specialDomainObjectsPage.getNumber() + 1, specialDomainObjectsPage.getSize()));
        } else {
            break;
        }
    }
    return specialDomainObjectsPage.getTotalElements();
}
@覆盖
public void createIndex()引发IOException{
trace(“准备生成索引…”);
IndexWriter IndexWriter=getIndexWriter();
开始日期=新日期();
trace(“从索引中删除所有文档…”);
index writer.deleteAll();
trace(“开始索引生成…”);
长numberOfProcessedObjects=fillIndex();
debug(“以“+(new Date().getTime()-start.getTime())+”毫秒形式写入的索引”;
debug(“处理对象的数量:{}”,numberOfProcessedObjects);
debug(“索引中的文档数:{}”,indexWriter.numDocs());
indexWriter.commit();
索引写入器强制合并(1);
}
@凌驾
受保护的长fillIndex()引发IOException{
Page specialDomainObjectsPage=specialDomainObjectRepository.findAll(新页面请求(0,最大页面元素));
while(true){
添加到目录(特殊域对象页);
if(specialDomainObjectsPage.hasNextPage()){
特殊领域对象=
findAll(新页面请求(specialDomainObjectsPage.getNumber()+1,specialDomainObjectsPage.getSize());
}否则{
打破
}
}
返回specialDomainObjectsPage.getTotalElements();
}
大约有2000个specialDomainObject实例,大约80个没有写入索引(我们与Luke进行了检查)


是否存在任何可能导致文档丢失的原因?

我们发现了问题:操作系统的默认编码未设置为UTF-8。

我必须问一下。您是否优雅地关闭了
IndexWriter
,是吗?是的,我们是。我们发现了问题。
@Override
public void createIndex() throws IOException {
    logger.trace("Preparing for index generation...");
    IndexWriter indexWriter = getIndexWriter();

    Date start = new Date();

    logger.trace("Deleting all documents from index...");
    indexWriter.deleteAll();

    logger.trace("Starting index generation...");
    long numberOfProcessedObjects = fillIndex();

    logger.debug("Index written in " + (new Date().getTime() - start.getTime()) + " milliseconds.");
    logger.debug("Number of processed objects: {}", numberOfProcessedObjects);
    logger.debug("Number of documents in index: {}", indexWriter.numDocs());

    indexWriter.commit();
    indexWriter.forceMerge(1);
}

@Override
protected long fillIndex() throws IOException {
    Page<SpecialDomainObject> specialDomainObjectsPage = specialDomainObjectRepository.findAll(new PageRequest(0, MAXIMUM_PAGE_ELEMENTS));
    while (true) {
        addToDirectory(specialDomainObjectsPage);
        if (specialDomainObjectsPage.hasNextPage()) {
            specialDomainObjectsPage =
                specialDomainObjectRepository.findAll(new PageRequest(specialDomainObjectsPage.getNumber() + 1, specialDomainObjectsPage.getSize()));
        } else {
            break;
        }
    }
    return specialDomainObjectsPage.getTotalElements();
}