查找重复项时指定Elasticsearch聚合字段
我在查找重复项时使用以下ES查询:查找重复项时指定Elasticsearch聚合字段,
Warning: implode(): Invalid arguments passed in /data/phpspider/zhask/webroot/tpl/detail.html on line 45
,,我在查找重复项时使用以下ES查询: "aggs": { "duplicates": { "terms": { "field": "phone", "min_doc_count": 2, "size": 99999, "order&
"aggs": {
"duplicates": {
"terms": {
"field": "phone",
"min_doc_count": 2,
"size": 99999,
"order": {
"_term": "asc"
}
},
"aggs": {
"_docs": {
"top_hits": {
"size": 99999
}
}
}
}
}
它工作得很好,它返回键,在本例中是手机
,并在其中返回所有匹配项。主要的问题是,在_源代码上,它带来了所有内容,在我的案例中有很多字段,我想指定只带来我需要的字段。返回内容的示例:
"duplicates": {
"1": {
"key": "1",
"doc_count": 2,
"_docs": {
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "local:company_id:1:sync",
"_type": "leads",
"_id": "23",
"_score": 1,
"_source": {
"id": 23,
"phone": 123456,
"areacode_id": 426,
"areacode_state_id": 2,
"firstName": "Brayan",
"lastName": "Rastelli",
"state": "", // .... and so on
我想指定将在\u源文件上返回的字段,是否可能
我遇到的另一个问题是,我想按特定字段(按id
)对聚合结果排序,但如果我输入任何字段名而不是\u term
,则会出现错误
谢谢大家! 在下面的示例中,具有id
29
和23
的文档具有相同的电话
,因此它们是重复的。搜索查询将仅显示两个字段,即id
和phone
(您可以根据自己的条件更改这些字段),并根据id
添加带有索引数据、搜索查询和搜索结果的工作示例
索引数据:
{
"id": 29,
"phone": 123456,
"areacode_id": 426,
"areacode_state_id": 2,
"firstName": "Brayan",
"lastName": "Rastelli",
"state": ""
}
{
"id": 23,
"phone": 123456,
"areacode_id": 426,
"areacode_state_id": 2,
"firstName": "Brayan",
"lastName": "Rastelli",
"state": ""
}
{
"id": 30,
"phone": 1235,
"areacode_id": 92,
"areacode_state_id": 10,
"firstName": "Mark",
"lastName": "Smith",
"state": ""
}
{
"size": 0,
"aggs": {
"duplicates": {
"terms": {
"field": "phone",
"min_doc_count": 2,
"size": 99999
},
"aggs": {
"_docs": {
"top_hits": {
"_source": {
"includes": [
"phone",
"id"
]
},
"sort": [
{
"id": {
"order": "asc"
}
}
]
}
}
}
}
}
}
"aggregations": {
"duplicates": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 123456,
"doc_count": 2,
"_docs": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "66896259",
"_type": "_doc",
"_id": "1",
"_score": null,
"_source": {
"phone": 123456,
"id": 23
},
"sort": [
23 // note this
]
},
{
"_index": "66896259",
"_type": "_doc",
"_id": "2",
"_score": null,
"_source": {
"phone": 123456,
"id": 29
},
"sort": [
29 // note this
]
}
]
}
}
}
]
}
}
搜索查询:
{
"id": 29,
"phone": 123456,
"areacode_id": 426,
"areacode_state_id": 2,
"firstName": "Brayan",
"lastName": "Rastelli",
"state": ""
}
{
"id": 23,
"phone": 123456,
"areacode_id": 426,
"areacode_state_id": 2,
"firstName": "Brayan",
"lastName": "Rastelli",
"state": ""
}
{
"id": 30,
"phone": 1235,
"areacode_id": 92,
"areacode_state_id": 10,
"firstName": "Mark",
"lastName": "Smith",
"state": ""
}
{
"size": 0,
"aggs": {
"duplicates": {
"terms": {
"field": "phone",
"min_doc_count": 2,
"size": 99999
},
"aggs": {
"_docs": {
"top_hits": {
"_source": {
"includes": [
"phone",
"id"
]
},
"sort": [
{
"id": {
"order": "asc"
}
}
]
}
}
}
}
}
}
"aggregations": {
"duplicates": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 123456,
"doc_count": 2,
"_docs": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "66896259",
"_type": "_doc",
"_id": "1",
"_score": null,
"_source": {
"phone": 123456,
"id": 23
},
"sort": [
23 // note this
]
},
{
"_index": "66896259",
"_type": "_doc",
"_id": "2",
"_score": null,
"_source": {
"phone": 123456,
"id": 29
},
"sort": [
29 // note this
]
}
]
}
}
}
]
}
}
搜索结果:
{
"id": 29,
"phone": 123456,
"areacode_id": 426,
"areacode_state_id": 2,
"firstName": "Brayan",
"lastName": "Rastelli",
"state": ""
}
{
"id": 23,
"phone": 123456,
"areacode_id": 426,
"areacode_state_id": 2,
"firstName": "Brayan",
"lastName": "Rastelli",
"state": ""
}
{
"id": 30,
"phone": 1235,
"areacode_id": 92,
"areacode_state_id": 10,
"firstName": "Mark",
"lastName": "Smith",
"state": ""
}
{
"size": 0,
"aggs": {
"duplicates": {
"terms": {
"field": "phone",
"min_doc_count": 2,
"size": 99999
},
"aggs": {
"_docs": {
"top_hits": {
"_source": {
"includes": [
"phone",
"id"
]
},
"sort": [
{
"id": {
"order": "asc"
}
}
]
}
}
}
}
}
}
"aggregations": {
"duplicates": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 123456,
"doc_count": 2,
"_docs": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "66896259",
"_type": "_doc",
"_id": "1",
"_score": null,
"_source": {
"phone": 123456,
"id": 23
},
"sort": [
23 // note this
]
},
{
"_index": "66896259",
"_type": "_doc",
"_id": "2",
"_score": null,
"_source": {
"phone": 123456,
"id": 29
},
"sort": [
29 // note this
]
}
]
}
}
}
]
}
}
太棒了,谢谢!!