Indexing ApacheLucene 8.4.1如何获取索引字段和术语列表?
我不熟悉ApacheLucene,我正在使用ApacheLucene 8.4.1,我可以做Lucene索引和搜索,但不知道如何使用java读取和列出索引/打印索引。 如何获取索引字段和术语列表 通过使用从其他Stackoverflow文章中获取的以下函数,我能够获得文件列表Indexing ApacheLucene 8.4.1如何获取索引字段和术语列表?,indexing,lucene,Indexing,Lucene,我不熟悉ApacheLucene,我正在使用ApacheLucene 8.4.1,我可以做Lucene索引和搜索,但不知道如何使用java读取和列出索引/打印索引。 如何获取索引字段和术语列表 通过使用从其他Stackoverflow文章中获取的以下函数,我能够获得文件列表 public static String[] getFieldNames(IndexReader reader) { List<String> fieldNames = new ArrayList<
public static String[] getFieldNames(IndexReader reader) {
List<String> fieldNames = new ArrayList<String>();
//For a simple reader over only one index, reader.leaves() should only return one LeafReaderContext
for (LeafReaderContext readerCtx : reader.leaves()) {
FieldInfos fields = readerCtx.reader().getFieldInfos();
for (FieldInfo field : fields) {
//Check whether the field is indexed and searchable, perhaps?
fieldNames.add(field.name);
}
}
return fieldNames.toArray(new String[fieldNames.size()]);
}
公共静态字符串[]GetFieldName(IndexReader){
List fieldNames=new ArrayList();
//对于只包含一个索引的简单读取器,reader.leaves()应该只返回一个leaveReaderContext
for(LeafReaderContext readerCtx:reader.leaves()){
FieldInfos fields=readerCtx.reader().getFieldInfos();
用于(字段信息字段:字段){
//检查该字段是否已编制索引并可搜索,也许是这样?
fieldNames.add(field.name);
}
}
返回fieldNames.toArray(新字符串[fieldNames.size()]);
}
感谢package com.lucene.ram;
导入java.io.IOException;
导入org.apache.lucene.analysis.Analyzer;
导入org.apache.lucene.analysis.standard.StandardAnalyzer;
导入org.apache.lucene.document.document;
导入org.apache.lucene.document.Field.Store;
导入org.apache.lucene.document.TextField;
导入org.apache.lucene.index.DirectoryReader;
导入org.apache.lucene.index.IndexReader;
导入org.apache.lucene.index.IndexWriter;
导入org.apache.lucene.index.IndexWriterConfig;
导入org.apache.lucene.index.IndexWriterConfig.OpenMode;
导入org.apache.lucene.queryparser.classic.ParseException;
导入org.apache.lucene.queryparser.classic.queryparser;
导入org.apache.lucene.search.indexsearch;
导入org.apache.lucene.search.Query;
导入org.apache.lucene.search.ScoreDoc;
导入org.apache.lucene.search.TopDocs;
导入org.apache.lucene.store.RAMDirectory;
/**
*
*@作者W.P.Roshan
*@gmail.com上的电子邮件sunone5
*
*RAMDirector已弃用,您可以使用
*
*导入org.apache.lucene.index.memory.MemoryIndex;
*
*/
公共类RAMDirectoryExample{
公共目录示例(){
//TODO自动生成的构造函数存根
}
静态void writeIndex(RAMDirectory ramDir,Analyzer){
试一试{
//索引写入器配置
IndexWriterConfig iwc=新的IndexWriterConfig(分析器);
iwc.setOpenMode(OpenMode.CREATE);
//IndexWriter将新索引文件写入目录
IndexWriter writer=新的IndexWriter(ramDir,iwc);
//创建一些具有名称和内容的文档
indexDoc(编写者,“文档1”,“你好世界”);
indexDoc(作家,“文件2”,“快乐世界你好”);
indexDoc(作家,“文件3”,“快乐世界你好”);
indexDoc(编写者,“文档4”,“你好,世界”);
//别忘了关上打字机
writer.close();
}捕获(IOE异常){
//这里有错误
e、 printStackTrace();
}
}
静态void indexDoc(IndexWriter writer、字符串名称、字符串内容)引发IOException{
单据单据=新单据();
添加文档(新的文本字段(“名称”,名称,Store.YES));
添加文档(新的文本字段(“content”,content,Store.YES));
writer.addDocument(doc);
}
静态void searchIndex(RAMDirectory ramDir,Analyzer){
IndexReader=null;
试一试{
//创建阅读器
reader=DirectoryReader.open(ramDir);
//创建索引搜索器
IndexSearcher search=新的IndexSearcher(阅读器);
//生成查询
QueryParser qp=新的QueryParser(“内容”,分析器);
Query=qp.parse(“快乐”);
//搜索索引
TopDocs foundDocs=searcher.search(查询,10);
//找到的文件总数
System.out.println(“总结果::”+foundDocs.totalHits);
//让我们打印找到的文档名称及其内容以及分数
for(ScoreDoc sd:foundDocs.scoreDocs){
文档d=searcher.doc(sd.doc);
System.out.println(“文件编号:“+sd.doc+”::文件名称:“+d.get”(“名称”)
+::内容:“+d.get”(“内容”)+”::分数:“+sd.Score”);
}
System.out.println(“”);
//别忘了关上阅读器
reader.close();
}捕获(IOE异常){
//这里有错误
e、 printStackTrace();
}捕获(解析异常){
//TODO自动生成的捕捉块
e、 printStackTrace();
}
}
静态void readIndex_Get_文档(RAMDirectory ramDir){
IndexReader=null;
试一试{
//创建阅读器
reader=DirectoryReader.open(ramDir);
//创建索引搜索器
IndexSearcher search=新的IndexSearcher(阅读器);
System.out.println(“--------------------------文件列表------------------------------------------”;
int maxDoc=reader.maxDoc();
对于(int i=0;ipackage com.lucene.ram;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
/**
*
* @author W.P.Roshan
* @email sunone5 at gmail.com
*
* The RAMDirector is deprecated instead you can use
*
* import org.apache.lucene.index.memory.MemoryIndex;
*
*/
public class RAMDirectoryExample {
public RAMDirectoryExample() {
// TODO Auto-generated constructor stub
}
static void writeIndex(RAMDirectory ramDir, Analyzer analyzer) {
try {
// IndexWriter Configuration
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE);
// IndexWriter writes new index files to the directory
IndexWriter writer = new IndexWriter(ramDir, iwc);
// Create some docs with name and content
indexDoc(writer, "document-1", "hello world");
indexDoc(writer, "document-2", "hello happy world");
indexDoc(writer, "document-3", "hello happy world");
indexDoc(writer, "document-4", "hello hello world");
// don't forget to close the writer
writer.close();
} catch (IOException e) {
// Any error goes here
e.printStackTrace();
}
}
static void indexDoc(IndexWriter writer, String name, String content) throws IOException {
Document doc = new Document();
doc.add(new TextField("name", name, Store.YES));
doc.add(new TextField("content", content, Store.YES));
writer.addDocument(doc);
}
static void searchIndex(RAMDirectory ramDir, Analyzer analyzer) {
IndexReader reader = null;
try {
// Create Reader
reader = DirectoryReader.open(ramDir);
// Create index searcher
IndexSearcher searcher = new IndexSearcher(reader);
// Build query
QueryParser qp = new QueryParser("content", analyzer);
Query query = qp.parse("happy");
// Search the index
TopDocs foundDocs = searcher.search(query, 10);
// Total found documents
System.out.println("Total Results :: " + foundDocs.totalHits);
// Let's print found doc names and their content along with score
for (ScoreDoc sd : foundDocs.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println("Document Number : " + sd.doc + " :: Document Name : " + d.get("name")
+ " :: Content : " + d.get("content") + " :: Score : " + sd.score);
}
System.out.println("");
// don't forget to close the reader
reader.close();
} catch (IOException e) {
// Any error goes here
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
static void readIndex_Get_Documents(RAMDirectory ramDir) {
IndexReader reader = null;
try {
// Create Reader
reader = DirectoryReader.open(ramDir);
// Create index searcher
IndexSearcher searcher = new IndexSearcher(reader);
System.out.println("-----------------------Document List-----------------------");
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
Document d = reader.document(i);
/**
* There are three types of method to retrieve indexed document name list
*/
/**
* Method 1 for get document name list
*/
// System.out.println(""+d.getFields().iterator().next().stringValue());
/**
* Method 2 for get document name list
*/
// System.out.println(""+d.iterator().next().stringValue());
/**
* Method 3 for get document name list
*/
String[] vls = d.getValues("name");
for (int j = 0; j < vls.length; j++) {
System.out.println("" + vls[j].toString());
}
}
// don't forget to close the reader
reader.close();
} catch (IOException e) {
// Any error goes here
e.printStackTrace();
}
}
static void readIndex_Get_Terms(RAMDirectory ramDir) {
IndexReader reader = null;
try {
// Create Reader
reader = DirectoryReader.open(ramDir);
// Create index searcher
IndexSearcher searcher = new IndexSearcher(reader);
System.out.println("");
System.out.println("--------------------------Term List------------------------");
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
Document d = reader.document(i);
/**
* There are three types of methods to retrieve indexed term list
*/
/**
* Method 1 for retrieve terms list
*/
// System.out.println(""+d.get("content").toString());
/**
* Method 2 for retrieve terms list
*/
// System.out.println(""+d.getField("content").stringValue());
/**
* Method 3 for retrieve terms list
*/
String[] vl = searcher.doc(i).getValues("content");
for (int k = 0; k < vl.length; k++) {
System.out.println("" + vl[k].toString());
}
}
// don't forget to close the reader
reader.close();
} catch (IOException e) {
// Any error goes here
e.printStackTrace();
}
}
public static void main(String[] args) {
// Create RAMDirectory instance
RAMDirectory ramDir = new RAMDirectory();
// Builds an analyzer with the default stop words
Analyzer analyzer = new StandardAnalyzer();
// Write some docs to RAMDirectory
writeIndex(ramDir, analyzer);
// Search indexed docs in RAMDirectory
searchIndex(ramDir, analyzer);
// read Index get indexed document list
readIndex_Get_Documents(ramDir);
// read Index get indexed terms list
readIndex_Get_Terms(ramDir);
}
}