如何在compass(lucene)中指定是否存储字段内容?
我试图了解一个生成compass 2.2索引、存储字段内容的遗留应用程序是否可以使用luke.net打开索引,据我所知,它没有存储字段,它只返回一个id,可能在其他地方用于从db中选择 请参见lucene: 如何判断此compass应用程序是否使用lucene.net Field.Store.NO的等效项进行索引 ,这是compass.cfg.xml:如何在compass(lucene)中指定是否存储字段内容?,lucene,lucene.net,compass-lucene,Lucene,Lucene.net,Compass Lucene,我试图了解一个生成compass 2.2索引、存储字段内容的遗留应用程序是否可以使用luke.net打开索引,据我所知,它没有存储字段,它只返回一个id,可能在其他地方用于从db中选择 请参见lucene: 如何判断此compass应用程序是否使用lucene.net Field.Store.NO的等效项进行索引 ,这是compass.cfg.xml: <compass-core-config xmlns="http://www.opensymphony.com/comp
<compass-core-config
xmlns="http://www.opensymphony.com/compass/schema/core-config"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.opensymphony.com/compass/schema/core-config
http://www.opensymphony.com/compass/schema/compass-core-config.xsd">
<compass name="default">
<connection>
<!-- index path from a file dataUpdate.properties -->
<file path="/" />
</connection>
<searchEngine>
<analyzer name="default" type="CustomAnalyzer" analyzerClass="myclass.beans.search.PerFieldAnalyzer" >
<!-- example :
<setting name="PerField-fieldname" value="org.apache.lucene.analysis.standard.StandardAnalyzer" />
<setting name="PerFieldConfig-stopwords-fieldname" value="no:" />
<setting name="PerFieldConfig-stopwords-fieldname" value="yes:aa,bb" />
-->
<setting name="PerField-symbol" value="org.apache.lucene.analysis.standard.StandardAnalyzer" />
<setting name="PerFieldConfig-stopwords-symbol" value="no:" />
<setting name="PerField-isin" value="org.apache.lucene.analysis.standard.StandardAnalyzer" />
<setting name="PerFieldConfig-stopwords-isin" value="no:" />
<setting name="PerField-tipo_opzione" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-settore_cod" value="org.apache.lucene.analysis.KeywordAnalyzer" />
<setting name="PerField-trend_medio" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-trend_breve" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-trend_lungo" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-tipo_sts_cod" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-valuta" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-sottotipo_tit" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-tabella_rt" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-market" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-cod_segmento" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-tipo_tit" value="org.apache.lucene.analysis.KeywordAnalyzer"/>
<setting name="PerField-radiocor" value="org.apache.lucene.analysis.standard.StandardAnalyzer" />
<setting name="PerFieldConfig-stopwords-radiocor" value="no:" />
</analyzer>
</searchEngine>
<mappings>
<class name="myclass.tserver.beans.search.SearchIndex" />
</mappings>
<settings>
<setting name="compass.transaction.lockTimeout" value="180" />
</settings>
</compass>
</compass-core-config>
是那个值=“否:”意思是不存储值,还是不把它当作“停用词”?而例如value=“org.apache.lucene.analysis.standard.StandardAnalyzer”意味着存储它
这是它似乎使用的分析仪:package myclass.tserver.beans.search;
import myclass.tserver.ejb.StubWrapper;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.compass.core.CompassException;
import org.compass.core.config.CompassConfigurable;
import org.compass.core.config.CompassSettings;
public class PerFieldAnalyzer extends PerFieldAnalyzerWrapper implements CompassConfigurable {
private static final String FIELD_PREFIX = "PerField-";
private static final String FIELD_CONFIG_PREFIX = "PerFieldConfig-";
private static final String STOP_WORDS_PREFIX = "stopwords-";
private static final String NO_STOP_WORDS_PREFIX = "no-stopwords-";
public PerFieldAnalyzer() {
super(new StandardAnalyzer());
}
public void configure(CompassSettings settings) throws CompassException {
for (Object obj : settings.getProperties().keySet()) {
if (obj != null && obj instanceof String && ((String) obj).startsWith(FIELD_PREFIX)) {
String field = ((String) obj).substring(FIELD_PREFIX.length());
String value = settings.getSetting((String) obj);
if (value != null) {
String stopwordsParameter = settings.getSetting(FIELD_CONFIG_PREFIX + STOP_WORDS_PREFIX + field);
String[] stopwords = null;
if (stopwordsParameter != null) {
if (stopwordsParameter.trim().toLowerCase().startsWith("no:"))
// no stopwords
stopwords = new String[] {};
else if (stopwordsParameter.trim().toLowerCase().startsWith("yes:"))
// stopwords
stopwords = stopwordsParameter.trim().substring(4).split(",");
} else
// stopwords di default dello StandardAnalyzer
stopwords = null;
try {
Analyzer analyzer = getAnalyzer(value, stopwords);
addAnalyzer(field, analyzer);
} catch (Exception e) {
new CompassException("Unable to set analyzer for field " + field + " : ", e);
}
}
}
}
}
private Analyzer getAnalyzer(String classname, String[] stopwords) throws ClassNotFoundException, SecurityException,
NoSuchMethodException, IllegalArgumentException, InstantiationException, IllegalAccessException,
InvocationTargetException {
Class<Analyzer> myclass = (Class<Analyzer>) Class.forName(classname);
if (stopwords == null) {
Constructor<Analyzer> myConstructor = myclass.getConstructor();
return (Analyzer) myConstructor.newInstance();
} else {
Constructor<Analyzer> myConstructor = myclass.getConstructor(String[].class);
return (Analyzer) myConstructor.newInstance((Object)stopwords);
}
}
}
包myclass.tserver.beans.search;
导入myclass.tserver.ejb.StubWrapper;
导入java.lang.reflect.Constructor;
导入java.lang.reflect.InvocationTargetException;
导入java.util.array;
导入java.util.Collections;
导入org.apache.lucene.analysis.Analyzer;
导入org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
导入org.apache.lucene.analysis.standard.StandardAnalyzer;
导入org.compass.core.CompassException;
导入org.compass.core.config.CompassConfigurable;
导入org.compass.core.config.CompassSettings;
公共类PerFieldAnalyzer扩展PerfielDanalyzerRapper实现CompassConfigurable{
私有静态最终字符串字段_PREFIX=“PerField-”;
私有静态最终字符串字段_CONFIG_PREFIX=“PerFieldConfig-”;
私有静态最终字符串STOP_WORDS_PREFIX=“stopwords-”;
私有静态最终字符串NO_STOP_WORDS_PREFIX=“NO stopwords-”;
公共性能分析器(){
超级(新标准分析器());
}
public void configure(CompassSettings设置)引发CompassException{
对于(对象对象对象:settings.getProperties().keySet()){
如果(obj!=null&&obj实例为字符串&&((字符串)obj).startsWith(字段前缀)){
字符串字段=((字符串)obj).substring(字段前缀.length());
String value=settings.getSetting((String)obj);
if(值!=null){
字符串stopwordsParameter=settings.getSetting(字段\配置\前缀+停止\单词\前缀+字段);
字符串[]stopwords=null;
if(stopwordsParameter!=null){
if(stopwordsParameter.trim().toLowerCase().StartWith(“no:”))
//没有停止语
stopwords=新字符串[]{};
else if(stopwordsParameter.trim().toLowerCase().startWith(“yes:”))
//停止语
stopwords=stopwordsParameter.trim().子字符串(4).拆分(“,”);
}否则
//stopwords di默认dello StandardAnalyzer
stopwords=null;
试一试{
Analyzer=getAnalyzer(值,停止字);
addAnalyzer(字段、分析器);
}捕获(例外e){
新CompassException(“无法为字段“+字段+:”,e设置分析器);
}
}
}
}
}
私有分析器getAnalyzer(字符串类名称,字符串[]停止字)抛出ClassNotFoundException,SecurityException,
NoSuchMethodException、IllegalArgumentException、InstanceionException、IllegalAccessException、,
调用目标异常{
类myclass=(类)Class.forName(类名称);
if(stopwords==null){
构造函数myConstructor=myclass.getConstructor();
返回(Analyzer)myConstructor.newInstance();
}否则{
构造函数myConstructor=myclass.getConstructor(字符串[].class);
返回(分析器)myConstructor.newInstance((对象)stopwords);
}
}
}
了解lucene文档存储哪些字段的最简单方法是通过lucene打开索引并读入文档,然后查看文档的字段列表。已编制索引但未存储的字段不会显示在文档的字段列表中
下面是我为您编写的Lucene.NET4.8中的一个示例,希望它能让您很好地了解如何检查为文档存储的字段。如果您使用的是Java而不是C#,那么您的语法当然会有所不同,并且您将使用的是旧版本的Lucene。但这段代码有望让您在这方面走得更远
在本例中,添加了两个文档,每个文档都有三个字段。但这三个字段中只有两个被存储,即使这三个字段都被索引。我在代码中添加了一条注释,您可以看到每个文档存储了哪些字段。在本例中,d.fields
列表中每个文档只有两个字段,因为只存储了两个字段
[Fact]
public void StoreFieldsList() {
Directory indexDir = new RAMDirectory();
Analyzer standardAnalyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
IndexWriterConfig indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, standardAnalyzer);
IndexWriter writer = new IndexWriter(indexDir, indexConfig);
Document doc = new Document();
doc.Add(new StringField("examplePrimaryKey", "001", Field.Store.YES));
doc.Add(new TextField("exampleField", "Unique gifts are great gifts.", Field.Store.YES));
doc.Add(new TextField("notStoredField", "Some text to index only.", Field.Store.NO));
writer.AddDocument(doc);
doc = new Document();
doc.Add(new StringField("examplePrimaryKey", "002", Field.Store.YES));
doc.Add(new TextField("exampleField", "Everyone is gifted.", Field.Store.YES));
doc.Add(new TextField("notStoredField", "Some text to index only. Two.", Field.Store.NO));
writer.AddDocument(doc);
writer.AddDocument(doc);
writer.Commit();
DirectoryReader reader = writer.GetReader(applyAllDeletes:true);
for (int i = 0; i < reader.NumDocs; i++) {
Document d = reader.Document(i);
for (int j = 0; j < d.Fields.Count; j++) {
IIndexableField field = d.Fields[j];
string fieldName = field.Name; //<--This field is a stored field for this document.
}
}
}
[事实]
public void StoreFieldsList(){
目录indexDir=new-RAMDirectory();
Analyzer standardAnalyzer=新的standardAnalyzer(LuceneVersion.LUCENE_48);
IndexWriterConfig indexConfig=新的IndexWriterConfig(LuceneVersion.LUCENE_48,standardAnalyzer);
IndexWriter writer=新的IndexWriter(indexDir,indexConfig);
文件d