Search 忽略solr和太阳黑子中的重音
如果我搜索“agua”,没有忽略搜索中的重音(á,ã,ç)的结果 #城市数据库(id、名称、用友、省id)Search 忽略solr和太阳黑子中的重音,search,solr,ruby-on-rails-4,sunspot-rails,Search,Solr,Ruby On Rails 4,Sunspot Rails,如果我搜索“agua”,没有忽略搜索中的重音(á,ã,ç)的结果 #城市数据库(id、名称、用友、省id) 1Águas Clara PR 3 2águas PR 4 3Áraguaia PR 3 #schema.xml <fieldType name="text" class="solr.TextField" omitNorms="false"> <analyzer type="index"> <tokenizer class="solr.Standa
1Águas Clara PR 3
2águas PR 4
3Áraguaia PR 3
#schema.xml
<fieldType name="text" class="solr.TextField" omitNorms="false">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
尝试运行分析,以查看查询vs索引时发生的情况 尝试这样做,调整主机名、核心名和字段名:
http://localhost:8983/solr/core1/analysis/field?wt=json&analysis.showmatch=true&analysis.fieldvalue=%C3%A1guas&analysis.query=%C3%A1guas&analysis.fieldname=name
结果将显示在分析的所有步骤中如何处理查询项
{
responseHeader:{
status:0,
QTime:2
},
analysis:{
field_types:{
},
field_names:{
Noms:{
index:[
"org.apache.lucene.analysis.standard.StandardTokenizer",
[
{
text:"état",
raw_bytes:"[c3 a9 74 61 74]",
start:0,
end:4,
type:"<ALPHANUM>",
position:1,
positionHistory:[
1
]
}
],
"org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
match:true,
position:1,
positionHistory:[
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>"
}
],
"org.apache.lucene.analysis.core.StopFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
match:true,
position:1,
positionHistory:[
1,
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>"
}
],
"org.apache.lucene.analysis.core.LowerCaseFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
match:true,
position:1,
positionHistory:[
1,
1,
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>"
}
]
],
query:[
"org.apache.lucene.analysis.standard.StandardTokenizer",
[
{
text:"état",
raw_bytes:"[c3 a9 74 61 74]",
start:0,
end:4,
type:"<ALPHANUM>",
position:1,
positionHistory:[
1
]
}
],
"org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
position:1,
positionHistory:[
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>"
}
],
"org.apache.lucene.analysis.synonym.SynonymFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength:1,
type:"<ALPHANUM>",
start:0,
end:4,
position:1,
positionHistory:[
1,
1,
1
]
}
],
"org.apache.lucene.analysis.core.StopFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
position:1,
positionHistory:[
1,
1,
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>",
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength:1
}
],
"org.apache.lucene.analysis.core.LowerCaseFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
position:1,
positionHistory:[
1,
1,
1,
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>",
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength:1
}
]
]
}
}
}
}
{
负责人:{
状态:0,
QTime:2
},
分析:{
字段类型:{
},
字段名称:{
名称:{
索引:[
“org.apache.lucene.analysis.standard.StandardTokenizer”,
[
{
案文:“埃塔”,
原始字节:“[c3 a9 74 61 74]”,
起点:0,
完:4,,
类型:“,
职位:1,,
职位历史:[
1.
]
}
],
“org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter”,
[
{
文本:“etat”,
原始字节:“[65 74 61 74]”,
匹配:对,
职位:1,,
职位历史:[
1.
1.
],
起点:0,
完:4,,
类型:“”
}
],
“org.apache.lucene.analysis.core.StopFilter”,
[
{
文本:“etat”,
原始字节:“[65 74 61 74]”,
匹配:对,
职位:1,,
职位历史:[
1.
1.
1.
],
起点:0,
完:4,,
类型:“”
}
],
“org.apache.lucene.analysis.core.LowerCaseFilter”,
[
{
文本:“etat”,
原始字节:“[65 74 61 74]”,
匹配:对,
职位:1,,
职位历史:[
1.
1.
1.
1.
],
起点:0,
完:4,,
类型:“”
}
]
],
查询:[
“org.apache.lucene.analysis.standard.StandardTokenizer”,
[
{
案文:“埃塔”,
原始字节:“[c3 a9 74 61 74]”,
起点:0,
完:4,,
类型:“,
职位:1,,
职位历史:[
1.
]
}
],
“org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter”,
[
{
文本:“etat”,
原始字节:“[65 74 61 74]”,
职位:1,,
职位历史:[
1.
1.
],
起点:0,
完:4,,
类型:“”
}
],
“org.apache.lucene.analysis.synonym.SynonymFilter”,
[
{
文本:“etat”,
原始字节:“[65 74 61 74]”,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength:1,
类型:“,
起点:0,
完:4,,
职位:1,,
职位历史:[
1.
1.
1.
]
}
],
“org.apache.lucene.analysis.core.StopFilter”,
[
{
文本:“etat”,
原始字节:“[65 74 61 74]”,
职位:1,,
职位历史:[
1.
1.
1.
1.
],
起点:0,
完:4,,
类型:“,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength:1
}
],
“org.apache.lucene.analysis.core.LowerCaseFilter”,
[
{
文本:“etat”,
原始字节:“[65 74 61 74]”,
职位:1,,
职位历史:[
1.
1.
1.
1.
1.
],
起点:0,
完:4,,
类型:“,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength:1
}
]
]
}
}
}
}
尝试运行分析,以查看查询vs索引时发生的情况
尝试这样做,调整主机名、核心名和字段名:
http://localhost:8983/solr/core1/analysis/field?wt=json&analysis.showmatch=true&analysis.fieldvalue=%C3%A1guas&analysis.query=%C3%A1guas&analysis.fieldname=name
结果将显示在分析的所有步骤中如何处理查询项
{
responseHeader:{
status:0,
QTime:2
},
analysis:{
field_types:{
},
field_names:{
Noms:{
index:[
"org.apache.lucene.analysis.standard.StandardTokenizer",
[
{
text:"état",
raw_bytes:"[c3 a9 74 61 74]",
start:0,
end:4,
type:"<ALPHANUM>",
position:1,
positionHistory:[
1
]
}
],
"org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
match:true,
position:1,
positionHistory:[
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>"
}
],
"org.apache.lucene.analysis.core.StopFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
match:true,
position:1,
positionHistory:[
1,
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>"
}
],
"org.apache.lucene.analysis.core.LowerCaseFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
match:true,
position:1,
positionHistory:[
1,
1,
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>"
}
]
],
query:[
"org.apache.lucene.analysis.standard.StandardTokenizer",
[
{
text:"état",
raw_bytes:"[c3 a9 74 61 74]",
start:0,
end:4,
type:"<ALPHANUM>",
position:1,
positionHistory:[
1
]
}
],
"org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
position:1,
positionHistory:[
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>"
}
],
"org.apache.lucene.analysis.synonym.SynonymFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength:1,
type:"<ALPHANUM>",
start:0,
end:4,
position:1,
positionHistory:[
1,
1,
1
]
}
],
"org.apache.lucene.analysis.core.StopFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
position:1,
positionHistory:[
1,
1,
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>",
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength:1
}
],
"org.apache.lucene.analysis.core.LowerCaseFilter",
[
{
text:"etat",
raw_bytes:"[65 74 61 74]",
position:1,
positionHistory:[
1,
1,
1,
1,
1
],
start:0,
end:4,
type:"<ALPHANUM>",
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength:1
}
]
]
}
}
}
}
{
负责人:{
状态:0,
QTime:2
},
分析:{
字段类型