将基于单词的solr搜索转换为边n gram
我的Solr根据word给出结果,但在输入字符时不显示结果。 示例:“This”的结果为“thisabook”,但“Th”的结果为空 有人能帮我配置edge n gram搜索吗 这是我的模式:将基于单词的solr搜索转换为边n gram,solr,solr-schema,Solr,Solr Schema,我的Solr根据word给出结果,但在输入字符时不显示结果。 示例:“This”的结果为“thisabook”,但“Th”的结果为空 有人能帮我配置edge n gram搜索吗 这是我的模式: <?xml version="1.0" encoding="UTF-8"?> <schema> <types> <!-- field type used for autocomplete feature in con
<?xml version="1.0" encoding="UTF-8"?>
<schema>
<types>
<!-- field type used for autocomplete feature in
conjunction with a suggester component -->
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
<fieldType name="edgytext" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="25" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="text_auto" class="solr.TextField">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
</types>
<fields>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="id" type="text" indexed="true" stored="true" multiValued="false" required="true" />
<field name="name" type="text" indexed="true" stored="true" multiValued="false" />
<field name="author" type="text" indexed="true" stored="true" multiValued="false" />
<field name="description" type="text" indexed="true" stored="true" multiValued="false" />
<field name="name_auto" type="edgytext" indexed="true" stored="true" multiValued="true" omitNorms="true"
omitTermFreqAndPositions="true"/>
<field name="weight" type="long" indexed="true" stored="true" multiValued="false" />
<copyField source="id" dest="name_auto" />
<copyField source="author" dest="name_auto" />
<copyField source="name" dest="name_auto" />
</fields>
<uniqueKey>id</uniqueKey>
</schema>
身份证件
这是我的solrconfig文件:
建议
org.apache.solr.spelling.Suggester
org.apache.solr.spelling.suggest.tst.TSTLookup
名称
dict.txt
...
没有一个
埃迪斯马克斯
10
id、作者、姓名、描述、重量、姓名
自动命名
重量描述
json
及
真的
1.
真的
0.7
真的
建议
我是新来Solr的。任何帮助都将是特殊的。:) 我终于让edge n grams开始工作了。下面是我的模式现在的样子
<?xml version="1.0" encoding="UTF-8"?>
<schema>
<types>
<!-- field type used for autocomplete feature in
conjunction with a suggester component -->
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
<fieldType name="edgytext" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping- ISOLatin1Accent.txt" />
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="20" minGramSize="1" side="front" />
<filter class="solr.PatternReplaceFilterFactory" pattern=" ([^\w\d\*æøåÆØÅ ])" replacement="" replace="all" />
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping- ISOLatin1Accent.txt" />
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory" pattern=" ([^\w\d\*æøåÆØÅ ])" replacement="" replace="all" />
<filter class="solr.PatternReplaceFilterFactory" pattern="^(.{20})(.*)?" replacement="$1" replace="all" />
</analyzer>
</fieldType>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="text_auto" class="solr.TextField">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
</types>
<fields>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="id" type="text" indexed="true" stored="true" multiValued="false" required="true" />
<field name="name" type="edgytext" indexed="true" stored="true" multiValued="false" />
<field name="author" type="text" indexed="true" stored="true" multiValued="false" />
<field name="description" type="text" indexed="true" stored="true" multiValued="false" />
<field name="name_auto" type="edgytext" indexed="true" stored="true" multiValued="true" omitNorms="true"
omitTermFreqAndPositions="true"/>
<field name="weight" type="long" indexed="true" stored="true" multiValued="false" />
<copyField source="author" dest="name_auto" />
<copyField source="name" dest="name_auto" />
</fields>
<uniqueKey>id</uniqueKey>
</schema>
身份证件
solr配置与上面粘贴的相同。这非常适用于边缘n克
请注意,无论何时更改架构文件,请始终删除旧数据并再次添加它。那些喜欢使用架构API的人-这对我很有用
curl -X POST -H 'Content-type:application/json' --data-binary '
{
"add-field-type": {
"name": "edgeGramText",
"class": "solr.TextField",
"positionIncrementGap": "100",
"indexAnalyzer": {
"tokenizer": {
"class": "solr.KeywordTokenizerFactory"
},
"filters": [
{
"class": "solr.LowerCaseFilterFactory"
},
{
"class": "solr.NGramTokenizerFactory",
"maxGramSize": "25",
"minGramSize": "1"
}
]
},
"queryAnalyzer": {
"tokenizer": {
"class": "solr.KeywordTokenizerFactory"
},
"filters": [
{
"class": "solr.LowerCaseFilterFactory"
}
]
}
},
"add-dynamic-field": {
"name": "*_eg",
"type": "edgeGramText",
"indexed": true,
"stored": true
}
}]' http://localhost:8983/solr/collectionName/schema
我也将其添加为动态字段,因此我不想更改我添加的所有edgeGram字段的模式。我只需要确保字段名以后缀“eg”结尾
希望这对大多数使用SolrCloud的人有所帮助 实际的Solr查询是什么样子的?添加suggest组件的配置也很有用:-)完成。仅供参考-此配置现在正在工作,但与我预期的不完全一样。例如,如果我搜索“So”,它给出了“Something erre”,但我搜索“erre”或“Something W”,那么它就不起作用
curl -X POST -H 'Content-type:application/json' --data-binary '
{
"add-field-type": {
"name": "edgeGramText",
"class": "solr.TextField",
"positionIncrementGap": "100",
"indexAnalyzer": {
"tokenizer": {
"class": "solr.KeywordTokenizerFactory"
},
"filters": [
{
"class": "solr.LowerCaseFilterFactory"
},
{
"class": "solr.NGramTokenizerFactory",
"maxGramSize": "25",
"minGramSize": "1"
}
]
},
"queryAnalyzer": {
"tokenizer": {
"class": "solr.KeywordTokenizerFactory"
},
"filters": [
{
"class": "solr.LowerCaseFilterFactory"
}
]
}
},
"add-dynamic-field": {
"name": "*_eg",
"type": "edgeGramText",
"indexed": true,
"stored": true
}
}]' http://localhost:8983/solr/collectionName/schema