Nest 解决嵌套不支持模式替换字符过滤器的问题
NEST似乎不支持此处描述的Nest 解决嵌套不支持模式替换字符过滤器的问题,nest,Nest,NEST似乎不支持此处描述的模式replace char filter: 我在上创建了一个问题 我的大部分索引工作正常,所以我想继续使用NEST。在索引配置过程中,有没有一种方法可以使用一些手动json注入来解决这个问题?我是NEST的新手,所以不确定这是否可行 具体来说,我希望使用模式replace char filter从街道地址中删除单元号,然后再通过自定义分析器(即#205-1260百老汇变成1260百老汇)。由于自定义分析器,我相信我需要使用这个字符过滤器来完成这项工作 我当前的配置
模式replace char filter
:
我在上创建了一个问题
我的大部分索引工作正常,所以我想继续使用NEST。在索引配置过程中,有没有一种方法可以使用一些手动json注入来解决这个问题?我是NEST的新手,所以不确定这是否可行
具体来说,我希望使用模式replace char filter
从街道地址中删除单元号,然后再通过自定义分析器(即#205-1260百老汇变成1260百老汇)。由于自定义分析器,我相信我需要使用这个字符过滤器来完成这项工作
我当前的配置如下所示:
elasticClient.CreateIndex("geocoding", c => c
.Analysis(ad => ad
.Analyzers(ab => ab
.Add("address-index", new CustomAnalyzer()
{
Tokenizer = "whitespace",
Filter = new List<string>() { "lowercase", "synonym" }
})
.Add("address-search", new CustomAnalyzer()
{
Tokenizer = "whitespace",
Filter = new List<string>() { "lowercase" },
CharFilter = new List<string>() { "drop-unit" }
})
)
.CharFilters(cfb => cfb
.Add("drop-unit", new CharFilter()) //missing char filter here
)
.TokenFilters(tfb => tfb
.Add("synonym", new SynonymTokenFilter()
{
Expand = true,
SynonymsPath = "analysis/synonym.txt"
})
)
)
elasticClient.CreateIndex(“地理编码”,c=>c
.分析(ad=>ad
.分析仪(ab=>ab
.Add(“地址索引”,新CustomAnalyzer()
{
标记器=“空白”,
筛选器=新列表(){“小写”、“同义词”}
})
.Add(“地址搜索”,新CustomAnalyzer()
{
标记器=“空白”,
筛选器=新列表(){“小写”},
CharFilter=new List(){“drop unit”}
})
)
.CharFilters(cfb=>cfb
.Add(“drop unit”,new CharFilter())//此处缺少char filter
)
.TokenFilters(tfb=>tfb
.Add(“同义词”,新同义词过滤器()
{
Expand=true,
SynonymsPath=“analysis/synonym.txt”
})
)
)
更新:
从2014年5月起,NEST现在支持
模式替换字符过滤器
:而不是在索引创建过程中使用fluent设置,您可以使用设置。添加方法以更手动的方式添加到FluentDictionary,但可以完全控制传入的设置。中显示了一个示例当然。我使用这种方法的原因非常相似
您的配置将类似于以下内容:
elasticClient.CreateIndex("geocoding", c => c.
.Settings(s => s.
.Add("analysis.analyzer.address-index.type", "custom")
.Add("analysis.analyzer.address-index.tokenizer", "whitespace")
.Add("analysis.analyzer.address-index.filter.0", "lowercase")
.Add("analysis.analyzer.address-index.filter.1", "synonym")
.Add("anaylsis.analyzer.address-search.type", "custom")
.Add("analysis.analyzer.address-search.tokenizer", "whitespace")
.Add("analysis.analyzer.address-search.filter.0", "lowercase")
.Add("analysis.analyzer.address-search.char_filter.0", "drop-unit")
.Add("analysis.char_filter.drop-unit.type", "mapping")
.Add("analysis.char_filter.drop-unit.mappings.0", "<mapping1>")
.Add("analysis.char_filter.drop-unit.mappings.1", "<mapping2>")
...
)
);
elasticClient.CreateIndex(“地理编码”,c=>c。
.设置(s=>s。
.Add(“analysis.analyzer.address index.type”,“custom”)
.Add(“analysis.analyzer.address index.tokenizer”,“空白”)
.Add(“analysis.analyzer.address index.filter.0”,“小写”)
.Add(“analysis.analyzer.address index.filter.1”,“同义词”)
.Add(“anaylsis.analyzer.address search.type”,“custom”)
.Add(“analysis.analyzer.address search.tokenizer”,“空白”)
.Add(“analysis.analyzer.address search.filter.0”,“小写”)
.Add(“analysis.analyzer.address search.char\u filter.0”,“drop unit”)
.Add(“analysis.char\u filter.drop unit.type”,“映射”)
.Add(“analysis.char\u filter.drop unit.mappings.0”,”)
.Add(“analysis.char\u filter.drop unit.mappings.1”,“”)
...
)
);
您需要将上面的
和
替换为您想要使用的实际字符过滤器映射。请注意,我以前没有使用过字符过滤器,因此设置值可能有点不合适,但应该可以让您朝着正确的方向前进。只是为了进一步了解Paige非常有用的答案,看起来就像你可以结合流畅和手动设置一样。添加方法。以下方法对我有效:
elasticClient.CreateIndex("geocoding", c => c
.Settings(s => s
.Add("analysis.char_filter.drop_unit.type", "pattern_replace")
.Add("analysis.char_filter.drop_unit.pattern", @"#\d+\s-\s")
.Add("analysis.char_filter.drop_unit.replacement", "")
)
.Analysis(ad => ad
.Analyzers(ab => ab
.Add("address_index", new CustomAnalyzer()
{
Tokenizer = "whitespace",
Filter = new List<string>() { "lowercase", "synonym" }
})
.Add("address_search", new CustomAnalyzer()
{
CharFilter = new List<string> { "drop_unit" },
Tokenizer = "whitespace",
Filter = new List<string>() { "lowercase" }
})
)
.TokenFilters(tfb => tfb
.Add("synonym", new SynonymTokenFilter()
{
Expand = true,
SynonymsPath = "analysis/synonym.txt"
})
)
)
elasticClient.CreateIndex(“地理编码”,c=>c
.Settings(s=>s
.Add(“分析.字符\过滤器.删除\单位.类型”,“模式\替换”)
.Add(“analysis.char\u filter.drop\u unit.pattern”,@“\d+\s-\s”)
.Add(“analysis.char\u filter.drop\u unit.replacement”,“”)
)
.分析(ad=>ad
.分析仪(ab=>ab
.Add(“地址索引”,新CustomAnalyzer()
{
标记器=“空白”,
筛选器=新列表(){“小写”、“同义词”}
})
.Add(“地址搜索”,新CustomAnalyzer()
{
CharFilter=新列表{“删除单元”},
标记器=“空白”,
筛选器=新列表(){“小写”}
})
)
.TokenFilters(tfb=>tfb
.Add(“同义词”,新同义词过滤器()
{
Expand=true,
SynonymsPath=“analysis/synonym.txt”
})
)
)
EsClient.CreateIndex(“通用”,c=>c
.副本数量(1)
.NumberOfShard(5)
.Settings(s=>s//仅作为示例
.Add(“merge.policy.merge\u factor”,“10”)
.Add(“search.slowlog.threshold.fetch.warn”,“1s”)
.Add(“analysis.char\u filter.drop\u chars.type”、“pattern\u replace”)
.Add(“analysis.char\u filter.drop\u chars.pattern”,@“[^0-9]”)
.Add(“analysis.char\u filter.drop\u chars.replacement”,“”)
.Add(“analysis.char\u filter.drop\u specChars.type”、“pattern\u replace”)
.Add(“analysis.char\u filter.drop\u specChars.pattern”@“[^0-9a-zA-Z]”)
.Add(“analysis.char\u filter.drop\u specChars.replacement”,“”)
)
.Analysis(描述符=>描述符
.分析仪(基准=>基准
.添加(“折叠的单词”,新CustomAnalyzer()
{
过滤器=新列表{“小写”、“asciifolding”、“trim”},
标记器=“sta”
EsClient.CreateIndex("universal_de", c => c
.NumberOfReplicas(1)
.NumberOfShards(5)
.Settings(s => s //just as an example
.Add("merge.policy.merge_factor", "10")
.Add("search.slowlog.threshold.fetch.warn", "1s")
.Add("analysis.char_filter.drop_chars.type", "pattern_replace")
.Add("analysis.char_filter.drop_chars.pattern", @"[^0-9]")
.Add("analysis.char_filter.drop_chars.replacement", "")
.Add("analysis.char_filter.drop_specChars.type", "pattern_replace")
.Add("analysis.char_filter.drop_specChars.pattern", @"[^0-9a-zA-Z]")
.Add("analysis.char_filter.drop_specChars.replacement", "")
)
.Analysis(descriptor => descriptor
.Analyzers(bases => bases
.Add("folded_word", new CustomAnalyzer()
{
Filter = new List<string> { "lowercase", "asciifolding", "trim" },
Tokenizer = "standard"
}
)
.Add("trimmed_number", new CustomAnalyzer()
{
CharFilter = new List<string> { "drop_chars" },
Tokenizer = "standard",
Filter = new List<string>() { "lowercase" }
})
.Add("trimmed_specChars", new CustomAnalyzer()
{
CharFilter = new List<string> { "drop_specChars" },
Tokenizer = "standard",
Filter = new List<string>() { "lowercase" }
})
)
)
.AddMapping<Business>(m => m
//.MapFromAttributes()
.Properties(props => props
.MultiField(mf => mf
.Name(t => t.DirectoryName)
.Fields(fs => fs
.String(s => s.Name(t => t.DirectoryName).Analyzer("standard"))
.String(s => s.Name(t => t.DirectoryName.Suffix("folded")).Analyzer("folded_word"))
)
)
.MultiField(mf => mf
.Name(t => t.Phone)
.Fields(fs => fs
.String(s => s.Name(t => t.Phone).Analyzer("trimmed_number"))
)
)
var result = _Instance.Search<Business>(q => q
.TrackScores(true)
.Query(qq =>
{
QueryContainer termQuery = null;
if (!string.IsNullOrWhiteSpace(input.searchTerm))
{
var toLowSearchTerm = input.searchTerm.ToLower();
termQuery |= qq.QueryString(qs => qs
.OnFieldsWithBoost(f => f
.Add("directoryName.folded", 5.0)
)
.Query(toLowSearchTerm));
termQuery |= qq.Fuzzy(fz => fz.OnField("directoryName.folded").Value(toLowSearchTerm).MaxExpansions(2));
termQuery |= qq.Term("phone", Regex.Replace(toLowSearchTerm, @"[^0-9]", ""));
}
return termQuery;
})
.Skip(input.skip)
.Take(input.take)
);
.Analysis(descriptor => descriptor
.Analyzers(bases => bases
.Add("folded_word", new CustomAnalyzer()
{
Filter = new List<string> { "lowercase", "asciifolding", "trim" },
Tokenizer = "standard"
}
)
.Add("trimmed_number", new CustomAnalyzer()
{
CharFilter = new List<string> { "drop_chars" },
Tokenizer = "standard",
Filter = new List<string>() { "lowercase" }
})
.Add("trimmed_specChars", new CustomAnalyzer()
{
CharFilter = new List<string> { "drop_specChars" },
Tokenizer = "standard",
Filter = new List<string>() { "lowercase" }
})
.Add("autocomplete", new CustomAnalyzer()
{
Tokenizer = new WhitespaceTokenizer().Type,
Filter = new List<string>() { "lowercase", "asciifolding", "trim", "engram" }
}
)
)
.TokenFilters(i => i
.Add("engram", new EdgeNGramTokenFilter
{
MinGram = 3,
MaxGram = 15
}
)
)
.CharFilters(cf => cf
.Add("drop_chars", new PatternReplaceCharFilter
{
Pattern = @"[^0-9]",
Replacement = ""
}
)
.Add("drop_specChars", new PatternReplaceCharFilter
{
Pattern = @"[^0-9a-zA-Z]",
Replacement = ""
}
)
)
)