Java Tika行动手册示例Lucene StandardAnalyzer不工作_Java_Lucene_Apache Tika

Java Tika行动手册示例Lucene StandardAnalyzer不工作

java lucene

Java Tika行动手册示例Lucene StandardAnalyzer不工作,java,lucene,apache-tika,Java,Lucene,Apache Tika,首先，当谈到蒂卡和卢森时，我是一个彻头彻尾的傻瓜。我正在阅读《蒂卡行动》一书，尝试这些例子。第5章给出了该示例： package tikatest01; import java.io.File; import org.apache.tika.Tika; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fie

首先，当谈到蒂卡和卢森时，我是一个彻头彻尾的傻瓜。我正在阅读《蒂卡行动》一书，尝试这些例子。第5章给出了该示例：

package tikatest01;

import java.io.File;
import org.apache.tika.Tika;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;

public class LuceneIndexer {

    private final Tika tika;
    private final IndexWriter writer;

    public LuceneIndexer(Tika tika, IndexWriter writer) {
        this.tika = tika;
        this.writer = writer;
    }

    public void indexDocument(File file) throws Exception {
        Document document = new Document();
        document.add(new Field(
            "filename", file.getName(),
            Store.YES, Index.ANALYZED));
        document.add(new Field(
            "fulltext", tika.parseToString(file),
            Store.NO, Index.ANALYZED));
        writer.addDocument(document);
    }
}

这个主要方法是：

package tikatest01;

import java.io.File;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.tika.Tika;

public class TikaTest01 {

    public static void main(String[] args) throws Exception {

        String filename = "C:\\testdoc.pdf";
        File file = new File(filename);

        IndexWriter writer = new IndexWriter(
            new SimpleFSDirectory(file),
            new StandardAnalyzer(Version.LUCENE_30), 
            MaxFieldLength.UNLIMITED);
        try {
            LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
            indexer.indexDocument(file);
            } 
        finally {
            writer.close();
            }
    }
}

我已经将库tika-app-1.5.jar、lucene-core-4.7.0.jar和lucene-analyzers-common-4.7.0.jar添加到项目中

问题:

对于Lucene的当前版本，Field.Index已被弃用，我应该使用什么替代

找不到MaxFieldLength。我缺少一个导入？

对于Lucene 4.7，IndexWriter没有这种构造函数看看API-

它只显示了具有2个参数的构造函数，因此您需要将此示例应用到新的Lucene API中，对于Lucene 4.7，IndexWriter没有这种构造函数看看API-

它只显示具有2个参数的构造函数，因此您需要将此示例应用于Lucene 4.7的新Lucene API

package tikatest01;

import java.io.File;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.tika.Tika;

public class LuceneIndexer {

    private final Tika tika;
    private final IndexWriter writer;

    public LuceneIndexer(Tika tika, IndexWriter writer) {
        this.tika = tika;
        this.writer = writer;
    }

    public void indexDocument(File file) throws Exception {
        Document document = new Document();
        document.add(new TextField(
                "filename", file.getName(), Store.YES));
        document.add(new TextField(
                "fulltext", tika.parseToString(file), Store.NO));
        writer.addDocument(document);
    }
}

这是主类的代码：

package tikatest01;

import java.io.File;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.apache.tika.Tika;

public class TikaTest01 {

    public static void main(String[] args) throws Exception {

        String dirname = "C:\\MyTestDir\\";
        File dir = new File(dirname);


        IndexWriter writer = new IndexWriter(
            new SimpleFSDirectory(dir), 
            new IndexWriterConfig(
                Version.LUCENE_47, 
                new StandardAnalyzer(Version.LUCENE_47)));
        try {
            LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
            indexer.indexDocument(dir);
            } 
        finally {
            writer.close();
            }
    }
}

对于Lucene 4.7，索引器的代码如下：

package tikatest01;

import java.io.File;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.tika.Tika;

public class LuceneIndexer {

    private final Tika tika;
    private final IndexWriter writer;

    public LuceneIndexer(Tika tika, IndexWriter writer) {
        this.tika = tika;
        this.writer = writer;
    }

    public void indexDocument(File file) throws Exception {
        Document document = new Document();
        document.add(new TextField(
                "filename", file.getName(), Store.YES));
        document.add(new TextField(
                "fulltext", tika.parseToString(file), Store.NO));
        writer.addDocument(document);
    }
}

这是主类的代码：

package tikatest01;

import java.io.File;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.apache.tika.Tika;

public class TikaTest01 {

    public static void main(String[] args) throws Exception {

        String dirname = "C:\\MyTestDir\\";
        File dir = new File(dirname);


        IndexWriter writer = new IndexWriter(
            new SimpleFSDirectory(dir), 
            new IndexWriterConfig(
                Version.LUCENE_47, 
                new StandardAnalyzer(Version.LUCENE_47)));
        try {
            LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
            indexer.indexDocument(dir);
            } 
        finally {
            writer.close();
            }
    }
}

使用Lucene 3.6或更全面地学习所有这些API。更全面地学习API正是我阅读这些书的原因。然而，一切似乎都写在Lucene 3.x上，而不是4.x:SOkay上。我的第二个问题得到了部分答案。我需要将lucene-analyzers-common-4.7.0.jar添加到我的项目中，并导入org.apache.lucene.analysis.standard.StandardAnalyzer MaxFieldLength问题仍然存在。更新了这个问题。使用Lucene 3.6或更全面地学习所有这些API。更全面地学习API正是我阅读这些书的原因。然而，一切似乎都写在Lucene 3.x上，而不是4.x:SOkay上。我的第二个问题得到了部分答案。我需要将lucene-analyzers-common-4.7.0.jar添加到我的项目中，并导入org.apache.lucene.analysis.standard.StandardAnalyzer MaxFieldLength问题仍然存在。为此更新了问题。