查找重复项时指定Elasticsearch聚合字段

查找重复项时指定Elasticsearch聚合字段,
Warning: implode(): Invalid arguments passed in /data/phpspider/zhask/webroot/tpl/detail.html on line 45
,,我在查找重复项时使用以下ES查询: "aggs": { "duplicates": { "terms": { "field": "phone", "min_doc_count": 2, "size": 99999, "order&

我在查找重复项时使用以下ES查询:

"aggs": {
    "duplicates": {
        "terms": {
            "field": "phone",
            "min_doc_count": 2,
            "size": 99999,
            "order": {
                "_term": "asc"
            }
        },
        "aggs": {
            "_docs": {
                "top_hits": {
                    "size": 99999
                }
            }
        }
    }
}
它工作得很好,它返回键,在本例中是
手机
,并在其中返回所有匹配项。主要的问题是,在_源代码上,它带来了所有内容,在我的案例中有很多字段,我想指定只带来我需要的字段。返回内容的示例:

        "duplicates": {
                "1": {
                    "key": "1",
                    "doc_count": 2,
                    "_docs": {
                        "hits": {
                            "total": 2,
                            "max_score": 1,
                            "hits": [
                                {
                                    "_index": "local:company_id:1:sync",
                                    "_type": "leads",
                                    "_id": "23",
                                    "_score": 1,
                                    "_source": {
                                        "id": 23,
                                        "phone": 123456,
                                        "areacode_id": 426,
                                        "areacode_state_id": 2,
                                        "firstName": "Brayan",
                                        "lastName": "Rastelli",
                                        "state": "", // .... and so on
我想指定将在
\u源文件上返回的字段,是否可能

我遇到的另一个问题是,我想按特定字段(按
id
)对聚合结果排序,但如果我输入任何字段名而不是
\u term
,则会出现错误


谢谢大家!

在下面的示例中,具有
id
29
23
的文档具有相同的
电话
,因此它们是重复的。搜索查询将仅显示两个字段,即
id
phone
(您可以根据自己的条件更改这些字段),并根据
id

添加带有索引数据、搜索查询和搜索结果的工作示例

索引数据:

{
  "id": 29,
  "phone": 123456,
  "areacode_id": 426,
  "areacode_state_id": 2,
  "firstName": "Brayan",
  "lastName": "Rastelli",
  "state": ""
}
{
  "id": 23,
  "phone": 123456,
  "areacode_id": 426,
  "areacode_state_id": 2,
  "firstName": "Brayan",
  "lastName": "Rastelli",
  "state": ""
}
{
  "id": 30,
  "phone": 1235,
  "areacode_id": 92,
  "areacode_state_id": 10,
  "firstName": "Mark",
  "lastName": "Smith",
  "state": ""
}
{
  "size": 0,
  "aggs": {
    "duplicates": {
      "terms": {
        "field": "phone",
        "min_doc_count": 2,
        "size": 99999
      },
      "aggs": {
        "_docs": {
          "top_hits": {
            "_source": {
              "includes": [
                "phone",
                "id"
              ]
            },
            "sort": [
              {
                "id": {
                  "order": "asc"
                }
              }
            ]
          }
        }
      }
    }
  }
}
"aggregations": {
    "duplicates": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": 123456,
          "doc_count": 2,
          "_docs": {
            "hits": {
              "total": {
                "value": 2,
                "relation": "eq"
              },
              "max_score": null,
              "hits": [
                {
                  "_index": "66896259",
                  "_type": "_doc",
                  "_id": "1",
                  "_score": null,
                  "_source": {
                    "phone": 123456,
                    "id": 23
                  },
                  "sort": [
                    23                       // note this
                  ]
                },
                {
                  "_index": "66896259",
                  "_type": "_doc",
                  "_id": "2",
                  "_score": null,
                  "_source": {
                    "phone": 123456,
                    "id": 29
                  },
                  "sort": [
                    29                         // note this
                  ] 
                }
              ]
            }
          }
        }
      ]
    }
  }
搜索查询:

{
  "id": 29,
  "phone": 123456,
  "areacode_id": 426,
  "areacode_state_id": 2,
  "firstName": "Brayan",
  "lastName": "Rastelli",
  "state": ""
}
{
  "id": 23,
  "phone": 123456,
  "areacode_id": 426,
  "areacode_state_id": 2,
  "firstName": "Brayan",
  "lastName": "Rastelli",
  "state": ""
}
{
  "id": 30,
  "phone": 1235,
  "areacode_id": 92,
  "areacode_state_id": 10,
  "firstName": "Mark",
  "lastName": "Smith",
  "state": ""
}
{
  "size": 0,
  "aggs": {
    "duplicates": {
      "terms": {
        "field": "phone",
        "min_doc_count": 2,
        "size": 99999
      },
      "aggs": {
        "_docs": {
          "top_hits": {
            "_source": {
              "includes": [
                "phone",
                "id"
              ]
            },
            "sort": [
              {
                "id": {
                  "order": "asc"
                }
              }
            ]
          }
        }
      }
    }
  }
}
"aggregations": {
    "duplicates": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": 123456,
          "doc_count": 2,
          "_docs": {
            "hits": {
              "total": {
                "value": 2,
                "relation": "eq"
              },
              "max_score": null,
              "hits": [
                {
                  "_index": "66896259",
                  "_type": "_doc",
                  "_id": "1",
                  "_score": null,
                  "_source": {
                    "phone": 123456,
                    "id": 23
                  },
                  "sort": [
                    23                       // note this
                  ]
                },
                {
                  "_index": "66896259",
                  "_type": "_doc",
                  "_id": "2",
                  "_score": null,
                  "_source": {
                    "phone": 123456,
                    "id": 29
                  },
                  "sort": [
                    29                         // note this
                  ] 
                }
              ]
            }
          }
        }
      ]
    }
  }
搜索结果:

{
  "id": 29,
  "phone": 123456,
  "areacode_id": 426,
  "areacode_state_id": 2,
  "firstName": "Brayan",
  "lastName": "Rastelli",
  "state": ""
}
{
  "id": 23,
  "phone": 123456,
  "areacode_id": 426,
  "areacode_state_id": 2,
  "firstName": "Brayan",
  "lastName": "Rastelli",
  "state": ""
}
{
  "id": 30,
  "phone": 1235,
  "areacode_id": 92,
  "areacode_state_id": 10,
  "firstName": "Mark",
  "lastName": "Smith",
  "state": ""
}
{
  "size": 0,
  "aggs": {
    "duplicates": {
      "terms": {
        "field": "phone",
        "min_doc_count": 2,
        "size": 99999
      },
      "aggs": {
        "_docs": {
          "top_hits": {
            "_source": {
              "includes": [
                "phone",
                "id"
              ]
            },
            "sort": [
              {
                "id": {
                  "order": "asc"
                }
              }
            ]
          }
        }
      }
    }
  }
}
"aggregations": {
    "duplicates": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": 123456,
          "doc_count": 2,
          "_docs": {
            "hits": {
              "total": {
                "value": 2,
                "relation": "eq"
              },
              "max_score": null,
              "hits": [
                {
                  "_index": "66896259",
                  "_type": "_doc",
                  "_id": "1",
                  "_score": null,
                  "_source": {
                    "phone": 123456,
                    "id": 23
                  },
                  "sort": [
                    23                       // note this
                  ]
                },
                {
                  "_index": "66896259",
                  "_type": "_doc",
                  "_id": "2",
                  "_score": null,
                  "_source": {
                    "phone": 123456,
                    "id": 29
                  },
                  "sort": [
                    29                         // note this
                  ] 
                }
              ]
            }
          }
        }
      ]
    }
  }

太棒了,谢谢!!