将Lucene Indexer和Searcher示例更新为Lucene 6.6.0
我已经更新了Lucene in Action第二版书中的索引器和搜索器示例 索引器工作正常,但搜索器不工作 我已经索引了一堆txt文件(索引过滤器txt文件) 当我使用Searcher类搜索一个单词时,我确信txt文件包含(可以用grep验证)它会找到0个匹配的文档 代码一定有问题 这是文件 pom.xml将Lucene Indexer和Searcher示例更新为Lucene 6.6.0,lucene,Lucene,我已经更新了Lucene in Action第二版书中的索引器和搜索器示例 索引器工作正常,但搜索器不工作 我已经索引了一堆txt文件(索引过滤器txt文件) 当我使用Searcher类搜索一个单词时,我确信txt文件包含(可以用grep验证)它会找到0个匹配的文档 代码一定有问题 这是文件 pom.xml 4.0.0 com.learning lucenebook 1.0-快照 org.apache.lucene lucene岩芯 6.6.0 org.apache.lucene 卢克尼探险家
4.0.0
com.learning
lucenebook
1.0-快照
org.apache.lucene
lucene岩芯
6.6.0
org.apache.lucene
卢克尼探险家
6.6.0
org.apache.maven.plugins
maven编译器插件
1.8
1.8
索引器:
package lia.meetlucene;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.FileReader;
import java.nio.file.Paths;
// From chapter 1
/**
* This code was originally written for
* Erik's Lucene intro java.net article
*/
public class Indexer {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java " + Indexer.class.getName()
+ " <index dir> <data dir>");
}
String indexDir = args[0]; //1
String dataDir = args[1]; //2
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed;
try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
}
private IndexWriter writer;
public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(Paths.get(indexDir));
writer = new IndexWriter(dir, new IndexWriterConfig()); //3
}
public void close() throws IOException {
writer.close(); //4
}
public int index(String dataDir, FileFilter filter)
throws Exception {
File[] files = new File(dataDir).listFiles();
for (File f: files) {
if (!f.isDirectory() &&
!f.isHidden() &&
f.exists() &&
f.canRead() &&
(filter == null || filter.accept(f))) {
indexFile(f);
}
}
return writer.numDocs(); //5
}
private static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase() //6
.endsWith(".txt"); //6
}
}
protected Document getDocument(File f) throws Exception {
Document doc = new Document();
doc.add(new Field("contents", new FileReader(f), new FieldType())); //7
FieldType notAnalyzed = new FieldType();
notAnalyzed.setTokenized(false);
notAnalyzed.setStored(true);
doc.add(new Field("filename", f.getName(), notAnalyzed //8
));//8
doc.add(new Field("fullpath", f.getCanonicalPath(), //9
notAnalyzed));//9
return doc;
}
private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc); //10
}
}
/*
#1 Create index in this directory
#2 Index *.txt files from this directory
#3 Create Lucene IndexWriter
#4 Close IndexWriter
#5 Return number of documents indexed
#6 Index .txt files only, using FileFilter
#7 Index file content
#8 Index file name
#9 Index file full path
#10 Add document to Lucene index
*/
包lia.meetlucene;
/**
*曼宁出版公司版权所有。
*
*根据Apache许可证2.0版(以下简称“许可证”)获得许可;
*除非遵守许可证,否则不得使用此文件。
*您可以通过以下方式获得许可证副本:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*除非适用法律要求或书面同意,软件
*根据许可证进行的分发是按“原样”进行分发的,
*无任何明示或暗示的保证或条件。
*请参阅特定lan的许可证
*/
导入org.apache.lucene.document.FieldType;
导入org.apache.lucene.index.IndexWriter;
导入org.apache.lucene.document.document;
导入org.apache.lucene.document.Field;
导入org.apache.lucene.index.IndexWriterConfig;
导入org.apache.lucene.store.FSDirectory;
导入org.apache.lucene.store.Directory;
导入java.io.File;
导入java.io.FileFilter;
导入java.io.IOException;
导入java.io.FileReader;
导入java.nio.file.path;
//从第一章开始
/**
*这段代码最初是为
*Erik的Lucene intro java.net文章
*/
公共类索引器{
公共静态void main(字符串[]args)引发异常{
如果(参数长度!=2){
抛出新的IllegalArgumentException(“用法:java”+Indexer.class.getName()
+ " ");
}
字符串indexDir=args[0];//1
字符串dataDir=args[1];//2
长启动=System.currentTimeMillis();
索引器Indexer=新索引器(indexDir);
内裸性交;
试一试{
numIndexed=indexer.index(dataDir,newtextfilesfilter());
}最后{
indexer.close();
}
long end=System.currentTimeMillis();
System.out.println(“索引”+numidexed+“文件已获取”
+(结束-开始)+“毫秒”);
}
私人索引作者;
公共索引器(字符串indexDir)引发IOException{
Directory dir=FSDirectory.open(path.get(indexDir));
writer=newindexwriter(dir,newindexwriterconfig());//3
}
public void close()引发IOException{
writer.close();//4
}
公共int索引(字符串dataDir、文件筛选器)
抛出异常{
File[]files=新文件(dataDir).listFiles();
用于(文件f:文件){
如果(!f.isDirectory()&&
!f.isHidden()&&
f、 存在()&&
f、 canRead()&&
(filter==null | | filter.accept(f))){
索引文件(f);
}
}
返回writer.numDocs();//5
}
私有静态类TextFileFilter实现FileFilter{
公共布尔接受(文件路径){
返回路径.getName().toLowerCase()//6
.endsWith(“.txt”);//6
}
}
受保护文档getDocument(文件f)引发异常{
单据单据=新单据();
添加(新字段(“内容”、新文件读取器(f)、新字段类型());//7
FieldType notAnalyzed=新的FieldType();
未分析。设置标记化(false);
未分析。设置存储(真);
doc.add(新字段(“文件名”,f.getName(),未分析//8
));//8
添加(新字段(“完整路径”,f.getCanonicalPath(),//9
未分析);//9
退货单;
}
私有void索引文件(文件f)引发异常{
System.out.println(“索引”+f.getCanonicalPath());
Document doc=getDocument(f);
writer.addDocument(doc);//10
}
}
/*
#1在此目录中创建索引
#2索引此目录中的*.txt文件
#3创建Lucene IndexWriter
#4闭式索引器
#5返回索引的文档数
#6仅索引.txt文件,使用FileFilter
#7索引文件内容
#8索引文件名
#9索引文件完整路径
#10将文档添加到Lucene索引
*/
和搜索者:
package lia.meetlucene;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import java.io.IOException;
import java.nio.file.Paths;
// From chapter 1
/**
* This code was originally written for
* Erik's Lucene intro java.net article
*/
public class Searcher {
public static void main(String[] args) throws IllegalArgumentException,
IOException, ParseException {
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java " + Searcher.class.getName()
+ " <index dir> <query>");
}
String indexDir = args[0]; //1
String q = args[1]; //2
search(indexDir, q);
}
public static void search(String indexDir, String q)
throws IOException, ParseException {
Directory dir = FSDirectory.open(Paths.get(indexDir)); //3
DirectoryReader directoryReader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(directoryReader); //3
QueryParser parser = new QueryParser( // 4
"f", //4
new StandardAnalyzer( )); //4
Query query = parser.parse(q); //4
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10); //5
long end = System.currentTimeMillis();
System.err.println("Found " + hits.totalHits + //6
" document(s) (in " + (end - start) + // 6
" milliseconds) that matched query '" + // 6
q + "':"); // 6
for(ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc); //7
System.out.println(doc.get("fullpath")); //8
}
//9
}
}
/*
#1 Parse provided index directory
#2 Parse provided query string
#3 Open index
#4 Parse query
#5 Search index
#6 Write search stats
#7 Retrieve matching document
#8 Display filename
#9 Close IndexSearcher
*/
包lia.meetlucene;
/**
*曼宁出版公司版权所有。
*
*根据Apache许可证2.0版(以下简称“许可证”)获得许可;
*除非遵守许可证,否则不得使用此文件。
*您可以通过以下方式获得许可证副本:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*除非适用法律要求或书面同意,软件
*根据许可证进行的分发是按“原样”进行分发的,
*无任何明示或暗示的保证或条件。
*请参阅特定lan的许可证
*/
导入org.apache.lucene.document.document;
导入org.apache.lucene.index.DirectoryReader;
导入org.apache.lucene.search.indexsearch;
导入org.apache.lucene.search.Query;
导入org.apache.lucene.search.ScoreDoc;
导入org.apache.lucene.search.TopDocs;
导入org.apache.lucene.store.FSDirectory;
导入org.apache.lucene.store.Directory;
导入org.apache.lucene.queryparser.classic.queryparser;
导入org.apache.lucene.queryparser.classic.ParseException;
导入org.apache.lucene.analysis.standard.StandardAnalyzer;
导入java.io.IOException;
导入java.nio.file.path;
//从第一章开始
/**
*这段代码最初是为
*埃里克氏
package lia.meetlucene;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import java.io.IOException;
import java.nio.file.Paths;
// From chapter 1
/**
* This code was originally written for
* Erik's Lucene intro java.net article
*/
public class Searcher {
public static void main(String[] args) throws IllegalArgumentException,
IOException, ParseException {
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java " + Searcher.class.getName()
+ " <index dir> <query>");
}
String indexDir = args[0]; //1
String q = args[1]; //2
search(indexDir, q);
}
public static void search(String indexDir, String q)
throws IOException, ParseException {
Directory dir = FSDirectory.open(Paths.get(indexDir)); //3
DirectoryReader directoryReader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(directoryReader); //3
QueryParser parser = new QueryParser( // 4
"f", //4
new StandardAnalyzer( )); //4
Query query = parser.parse(q); //4
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10); //5
long end = System.currentTimeMillis();
System.err.println("Found " + hits.totalHits + //6
" document(s) (in " + (end - start) + // 6
" milliseconds) that matched query '" + // 6
q + "':"); // 6
for(ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc); //7
System.out.println(doc.get("fullpath")); //8
}
//9
}
}
/*
#1 Parse provided index directory
#2 Parse provided query string
#3 Open index
#4 Parse query
#5 Search index
#6 Write search stats
#7 Retrieve matching document
#8 Display filename
#9 Close IndexSearcher
*/
package lia.meetlucene;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.FileReader;
import java.nio.file.Paths;
// From chapter 1
/**
* This code was originally written for
* Erik's Lucene intro java.net article
*/
public class Indexer {
public static void main(String[] args) {
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java " + Indexer.class.getName()
+ " <index dir> <data dir>");
}
String indexDir = args[0]; //1
String dataDir = args[1];//2
Indexer indexer = null;
long start = System.currentTimeMillis();
int numIndexed = 0;
try {
indexer = new Indexer(indexDir);
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} catch(Exception e) {
e.printStackTrace();
} finally {
if (indexer != null)
try {
indexer.close();
} catch (IOException e) {
// ignored
}
}
long end = System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
}
private IndexWriter writer;
public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(Paths.get(indexDir));
writer = new IndexWriter(dir, new IndexWriterConfig()); //3
}
public void close() throws IOException {
writer.close(); //4
}
public int index(String dataDir, FileFilter filter)
throws Exception {
File[] files = new File(dataDir).listFiles();
for (File f: files) {
if (!f.isDirectory() &&
!f.isHidden() &&
f.exists() &&
f.canRead() &&
(filter == null || filter.accept(f))) {
indexFile(f);
}
}
return writer.numDocs(); //5
}
private static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase() //6
.endsWith(".txt"); //6
}
}
protected Document getDocument(File f) throws Exception {
Document doc = new Document();
doc.add(new TextField("contents", new FileReader(f))); //7
FieldType notAnalyzed = new FieldType();
notAnalyzed.setTokenized(false);
notAnalyzed.setStored(true);
doc.add(new Field("filename", f.getName(), notAnalyzed //8
));//8
doc.add(new Field("fullpath", f.getCanonicalPath(), //9
notAnalyzed));//9
return doc;
}
private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc); //10
}
}
/*
#1 Create index in this directory
#2 Index *.txt files from this directory
#3 Create Lucene IndexWriter
#4 Close IndexWriter
#5 Return number of documents indexed
#6 Index .txt files only, using FileFilter
#7 Index file content
#8 Index file name
#9 Index file full path
#10 Add document to Lucene index
*/
package lia.meetlucene;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import java.io.IOException;
import java.nio.file.Paths;
// From chapter 1
/**
* This code was originally written for
* Erik's Lucene intro java.net article
*/
public class Searcher {
public static void main(String[] args) throws IllegalArgumentException,
IOException, ParseException {
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java " + Searcher.class.getName()
+ " <index dir> <query>");
}
String indexDir = args[0]; //1
String q = args[1]; //2
search(indexDir, q);
}
public static void search(String indexDir, String q)
throws IOException, ParseException {
Directory dir = FSDirectory.open(Paths.get(indexDir)); //3
DirectoryReader directoryReader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(directoryReader); //3
QueryParser parser = new QueryParser( // 4
"contents", //4
new StandardAnalyzer( )); //4
Query query = parser.parse(q); //4
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10); //5
long end = System.currentTimeMillis();
System.err.println("Found " + hits.totalHits + //6
" document(s) (in " + (end - start) + // 6
" milliseconds) that matched query '" + // 6
q + "':"); // 6
for(ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc); //7
System.out.println(doc.get("fullpath")); //8
}
//9
}
}
/*
#1 Parse provided index directory
#2 Parse provided query string
#3 Open index
#4 Parse query
#5 Search index
#6 Write search stats
#7 Retrieve matching document
#8 Display filename
#9 Close IndexSearcher
*/