Python 如何使用PyLucene从Lucene 8.6.1索引中获取所有令牌的列表?
我从他那里得到了一些指示。我首先制作如下索引Python 如何使用PyLucene从Lucene 8.6.1索引中获取所有令牌的列表?,python,search,lucene,full-text-search,pylucene,Python,Search,Lucene,Full Text Search,Pylucene,我从他那里得到了一些指示。我首先制作如下索引 import lucene from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import IndexWriterConfig, IndexWriter, DirectoryReader from org.apache.lucene.store import SimpleFSDirectory from java
import lucene
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import IndexWriterConfig, IndexWriter, DirectoryReader
from org.apache.lucene.store import SimpleFSDirectory
from java.nio.file import Paths
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.util import BytesRefIterator
index_path = "./index"
lucene.initVM()
analyzer = StandardAnalyzer()
config = IndexWriterConfig(analyzer)
if len(os.listdir(index_path))>0:
config.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
store = SimpleFSDirectory(Paths.get(index_path))
writer = IndexWriter(store, config)
doc = Document()
doc.add(Field("docid", "1", TextField.TYPE_STORED))
doc.add(Field("title", "qwe rty", TextField.TYPE_STORED))
doc.add(Field("description", "uio pas", TextField.TYPE_STORED))
writer.addDocument(doc)
writer.close()
store.close()
store = SimpleFSDirectory(Paths.get(index_path))
reader = DirectoryReader.open(store)
然后,我尝试获取索引中一个字段的所有术语,如下所示
import lucene
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import IndexWriterConfig, IndexWriter, DirectoryReader
from org.apache.lucene.store import SimpleFSDirectory
from java.nio.file import Paths
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.util import BytesRefIterator
index_path = "./index"
lucene.initVM()
analyzer = StandardAnalyzer()
config = IndexWriterConfig(analyzer)
if len(os.listdir(index_path))>0:
config.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
store = SimpleFSDirectory(Paths.get(index_path))
writer = IndexWriter(store, config)
doc = Document()
doc.add(Field("docid", "1", TextField.TYPE_STORED))
doc.add(Field("title", "qwe rty", TextField.TYPE_STORED))
doc.add(Field("description", "uio pas", TextField.TYPE_STORED))
writer.addDocument(doc)
writer.close()
store.close()
store = SimpleFSDirectory(Paths.get(index_path))
reader = DirectoryReader.open(store)
尝试1:尝试使用中使用的next()
,该方法似乎是由TermsEnum
实现的bytesrefigerator
方法
for lrc in reader.leaves():
terms = lrc.reader().terms('title')
terms_enum = terms.iterator()
while terms_enum.next():
term = terms_enum.term()
print(term.utf8ToString())
但是,我似乎无法访问next()
方法
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-47-6515079843a0> in <module>
2 terms = lrc.reader().terms('title')
3 terms_enum = terms.iterator()
----> 4 while terms_enum.next():
5 term = terms_enum.term()
6 print(term.utf8ToString())
AttributeError: 'TermsEnum' object has no attribute 'next'
然而,Python似乎不把TermsEnum
理解为迭代器
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-48-d490ad78fb1c> in <module>
2 terms = lrc.reader().terms('title')
3 terms_enum = terms.iterator()
----> 4 while next(terms_enum):
5 term = terms_enum.term()
6 print(term.utf8ToString())
TypeError: 'TermsEnum' object is not an iterator
---------------------------------------------------------------------------
TypeError回溯(最近一次调用上次)
在里面
2 terms=lrc.reader().terms('标题')
3 terms_enum=terms.iterator()
---->4下一步(术语列表):
5术语=术语\枚举术语()
6打印(term.utf8ToString())
TypeError:“TermsEnum”对象不是迭代器
我知道我的问题可以按照中的建议得到回答。然后我想我的问题是,我如何获得
TermsEnum
中的所有术语?我发现下面的方法可以从test\u FieldEnumeration()
中获得,该文件位于Pylucene-8.6.1/test3/
中
for term in BytesRefIterator.cast_(terms_enum):
print(term.utf8ToString())
很高兴接受一个比这个解释更多的答案