无法在lucene中获取搜索文本_Lucene

无法在lucene中获取搜索文本

lucene

无法在lucene中获取搜索文本,lucene,Lucene,我在此粘贴以下代码 import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document;

我在此粘贴以下代码

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
            import org.apache.lucene.document.Document;
            import org.apache.lucene.document.Field;
            import org.apache.lucene.document.StringField; 
            import org.apache.lucene.index.DirectoryReader;
            import org.apache.lucene.index.IndexReader;
            import org.apache.lucene.index.IndexWriter;
            import org.apache.lucene.index.IndexWriterConfig;
            import org.apache.lucene.queryparser.classic.QueryParser;
            import org.apache.lucene.search.IndexSearcher;
            import org.apache.lucene.search.Query;
            import org.apache.lucene.search.ScoreDoc;
            import org.apache.lucene.search.TopScoreDocCollector;
            import org.apache.lucene.store.FSDirectory;
            import org.apache.lucene.util.Version;

            import java.io.*;
            import java.util.ArrayList;

            /**
             * This terminal application creates an Apache Lucene index in a folder and adds files into this index
             * based on the input of the user.
             */
            public class TextFileIndexer {
              private static StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
              private Analyzer anal = new WhitespaceAnalyzer(Version.LUCENE_44);
              private IndexWriter writer;
              private ArrayList<File> queue = new ArrayList<File>();


              public static void main(String[] args) throws IOException {
                System.out.println("Enter the path where the index will be created: (e.g. /tmp/index or c:/temp/index)");

                String indexLocation = null;
                BufferedReader br = new BufferedReader(
                        new InputStreamReader(System.in));
                String s = br.readLine();

                TextFileIndexer indexer = null;
                try {
                  indexLocation = s;
                  indexer = new TextFileIndexer(s);
                } catch (Exception ex) {
                  System.out.println("Cannot create index..." + ex.getMessage());
                  System.exit(-1);
                }

                //===================================================
                //read input from user until he enters q for quit
                //===================================================
                while (!s.equalsIgnoreCase("q")) {
                  try {
                    System.out.println("Enter the full path to add into the index (q=quit): (e.g. /home/ron/mydir or c:\\Users\\ron\\mydir)");
                    System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");
                    s = br.readLine();
                    if (s.equalsIgnoreCase("q")) {
                      break;
                    }

                    //try to add file into the index
                    indexer.indexFileOrDirectory(s);
                  } catch (Exception e) {
                    System.out.println("Error indexing " + s + " : " + e.getMessage());
                  }
                }

                //===================================================
                //after adding, we always have to call the
                //closeIndex, otherwise the index is not created    
                //===================================================
                indexer.closeIndex();

                //=========================================================
                // Now search
                //=========================================================
                IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
                IndexSearcher searcher = new IndexSearcher(reader);
                TopScoreDocCollector collector = TopScoreDocCollector.create(5, true);

                s = "";
                while (!s.equalsIgnoreCase("q")) {
                  try {
                    System.out.println("Enter the search query (q=quit):");
                    s = br.readLine();
                    if (s.equalsIgnoreCase("q")) {
                      break;
                    }
                    Query q = new QueryParser(Version.LUCENE_44, "contents", analyzer).parse(s);
                    searcher.search(q, collector);
                    ScoreDoc[] hits = collector.topDocs().scoreDocs;

                    // 4. display results
                    System.out.println("Found " + hits.length + " hits.");
                    for(int i=0;i<hits.length;++i) {
                      int docId = hits[i].doc;
                      Document d = searcher.doc(docId);
                      System.out.println((i + 1) + ". " + d.get("path") + " score=" + hits[i].score);
                    }

                  } catch (Exception e) {
                    System.out.println("Error searching " + s + " : " + e.getMessage());
                  }
                }

              }

              /**
               * Constructor
               * @param indexDir the name of the folder in which the index should be created
               * @throws java.io.IOException when exception creating index.
               */
              TextFileIndexer(String indexDir) throws IOException {
                // the boolean true parameter means to create a new index everytime, 
                // potentially overwriting any existing files there.
                FSDirectory dir = FSDirectory.open(new File(indexDir));


                IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer);

                writer = new IndexWriter(dir, config);
              }

              /**
               * Indexes a file or directory
               * @param fileName the name of a text file or a folder we wish to add to the index
               * @throws java.io.IOException when exception
               */
              public void indexFileOrDirectory(String fileName) throws IOException {
                //===================================================
                //gets the list of files in a folder (if user has submitted
                //the name of a folder) or gets a single file name (is user
                //has submitted only the file name) 
                //===================================================
                addFiles(new File(fileName));

                int originalNumDocs = writer.numDocs();
                for (File f : queue) {
                  FileReader fr = null;
                  try {
                    Document doc = new Document();

                    //===================================================
                    // add contents of file
                    //===================================================
                    fr = new FileReader(f);

            //        doc.add(new TextField("contents", fr));
                    doc.add(new StringField("path", f.getPath(), Field.Store.YES));
                    doc.add(new StringField("filename", f.getName(), Field.Store.YES));

                    writer.addDocument(doc);
                    System.out.println("Added: " + f);



                    BufferedReader br = new BufferedReader(new FileReader(fileName));
                    Field field = new StringField("contents", br.readLine().toString(),
                            Field.Store.YES);
                    doc.add(field);
                    writer.addDocument(doc);


                  } catch (Exception e) {
                    System.out.println("Could not add: " + f);
                  } finally {
                    fr.close();
                  }
                }

                int newNumDocs = writer.numDocs();
                System.out.println("");
                System.out.println("************************");
                System.out.println((newNumDocs - originalNumDocs) + " documents added.");
                System.out.println("************************");

                queue.clear();
              }

              private void addFiles(File file) {

                if (!file.exists()) {
                  System.out.println(file + " does not exist.");
                }
                if (file.isDirectory()) {
                  for (File f : file.listFiles()) {
                    addFiles(f);
                  }
                } else {
                  String filename = file.getName().toLowerCase();
                  //===================================================
                  // Only index text files
                  //===================================================
                  if (filename.endsWith(".htm") || filename.endsWith(".html") || 
                          filename.endsWith(".xml") || filename.endsWith(".txt")  || filename.endsWith(".pdf") ) {
                    queue.add(file);
                  } else {
                    System.out.println("Skipped " + filename);
                  }
                }
              }

              /**
               * Close the index.
               * @throws java.io.IOException when exception closing
               */
              public void closeIndex() throws IOException {
                writer.close();
              }
            }

默认情况下，在

StandardAnalyzer

中，“for”和“i”都是停止字，因此无法搜索。默认停止词的完整列表为：

   "a", "an", "and", "are", "as", "at", "be", "but", "by",
   "for", "if", "in", "into", "is", "it",
   "no", "not", "of", "on", "or", "such",
   "that", "the", "their", "then", "there", "these",
   "they", "this", "to", "was", "will", "with"

似乎还有其他问题在起作用。不知道为什么你的读者会因为第二个查询而关闭。我也不知道输出“fieldName=for”来自哪里。但希望这能让您开始调试。

您试过在Luke中调试代码吗？（Lucene索引工具箱）

Luke非常擅长使用不同的分析器执行搜索、检查索引存储、了解文档如何根据搜索进行评分等。它可以帮助消除搜索代码的任何问题，因为它直接作用于索引文件

Luke适用于Lucene的Java和.NET版本

   "a", "an", "and", "are", "as", "at", "be", "but", "by",
   "for", "if", "in", "into", "is", "it",
   "no", "not", "of", "on", "or", "such",
   "that", "the", "their", "then", "there", "these",
   "they", "this", "to", "was", "will", "with"