elasticsearch Elasticsearch只获取桶大小大于给定数字的日期直方图
我有下面的查询,其中我得到了过去3年的数据,按月计算,我还得到了数据存在的月份数(桶)。以下是我的疑问:
elasticsearch Elasticsearch只获取桶大小大于给定数字的日期直方图,
elasticsearch,
elasticsearch,我有下面的查询,其中我得到了过去3年的数据,按月计算,我还得到了数据存在的月份数(桶)。以下是我的疑问: { "size": 0, "query": { "bool": { "filter": { "terms": { "compId": [ 111, 112
{
"size": 0,
"query": {
"bool": {
"filter": {
"terms": {
"compId": [
111,
112
]
}
},
"must": {
"range": {
"dateCreated": {
"from": "2016-04-01",
"to": "2019-03-31",
"format": "yyyy-MM-dd"
}
}
}
}
},
"aggs": {
"grp_company": {
"terms": {
"field": "compId"
},
"aggs": {
"data_per_month": {
"date_histogram": {
"field": "dateCreated",
"interval": "month"
}
},
"count_buckets": {
"stats_bucket": { --> I am getting the count of buckets here
"buckets_path": "data_per_month._count"
}
}
}
}
}
}
然而,现在我只想要那些桶计数大于30的日期直方图。在ElasticSearch中可能吗?如果是的话,怎么办
上面的查询给出了以下结果:
{
"took": 68,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 454566,
"max_score": 0,
"hits": []
},
"aggregations": {
"grp_company": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 111,
"doc_count": 609014,
"data_per_month": {
"buckets": [
{
"key_as_string": "2017-07-01T00:00:00.000Z",
"key": 1498867200000,
"doc_count": 638
},
{
"key_as_string": "2017-08-01T00:00:00.000Z",
"key": 1501545600000,
"doc_count": 512
},
{
"key_as_string": "2017-09-01T00:00:00.000Z",
"key": 1504224000000,
"doc_count": 491
},
{
"key_as_string": "2017-10-01T00:00:00.000Z",
"key": 1506816000000,
"doc_count": 548
},
{
"key_as_string": "2017-11-01T00:00:00.000Z",
"key": 1509494400000,
"doc_count": 504
},
{
"key_as_string": "2017-12-01T00:00:00.000Z",
"key": 1512086400000,
"doc_count": 415
},
{
"key_as_string": "2018-01-01T00:00:00.000Z",
"key": 1514764800000,
"doc_count": 759
},
{
"key_as_string": "2018-02-01T00:00:00.000Z",
"key": 1517443200000,
"doc_count": 98564
},
{
"key_as_string": "2018-03-01T00:00:00.000Z",
"key": 1519862400000,
"doc_count": 29185
},
{
"key_as_string": "2018-04-01T00:00:00.000Z",
"key": 1522540800000,
"doc_count": 38522
},
{
"key_as_string": "2018-05-01T00:00:00.000Z",
"key": 1525132800000,
"doc_count": 22821
},
{
"key_as_string": "2018-06-01T00:00:00.000Z",
"key": 1527811200000,
"doc_count": 31076
},
{
"key_as_string": "2018-07-01T00:00:00.000Z",
"key": 1530403200000,
"doc_count": 67150
},
{
"key_as_string": "2018-08-01T00:00:00.000Z",
"key": 1533081600000,
"doc_count": 13464
},
{
"key_as_string": "2018-09-01T00:00:00.000Z",
"key": 1535760000000,
"doc_count": 59498
},
{
"key_as_string": "2018-10-01T00:00:00.000Z",
"key": 1538352000000,
"doc_count": 27222
},
{
"key_as_string": "2018-11-01T00:00:00.000Z",
"key": 1541030400000,
"doc_count": 46009
},
{
"key_as_string": "2018-12-01T00:00:00.000Z",
"key": 1543622400000,
"doc_count": 55696
},
{
"key_as_string": "2019-01-01T00:00:00.000Z",
"key": 1546300800000,
"doc_count": 45538
},
{
"key_as_string": "2019-02-01T00:00:00.000Z",
"key": 1548979200000,
"doc_count": 49606
},
{
"key_as_string": "2019-03-01T00:00:00.000Z",
"key": 1551398400000,
"doc_count": 20796
}
]
},
"count_buckets": {
"count": 21,
"min": 415,
"max": 98564,
"avg": 29000.666666666668,
"sum": 609014
}
},
{
"key": 112,
"doc_count": 98564,
"data_per_month": {
"buckets": [
{
"key_as_string": "2016-09-01T00:00:00.000Z",
"key": 1472688000000,
"doc_count": 3123
},
{
"key_as_string": "2016-10-01T00:00:00.000Z",
"key": 1475280000000,
"doc_count": 3156
},
{
"key_as_string": "2016-11-01T00:00:00.000Z",
"key": 1477958400000,
"doc_count": 1489
},
{
"key_as_string": "2016-12-01T00:00:00.000Z",
"key": 1480550400000,
"doc_count": 1948
},
{
"key_as_string": "2017-01-01T00:00:00.000Z",
"key": 1483228800000,
"doc_count": 3996
},
{
"key_as_string": "2017-02-01T00:00:00.000Z",
"key": 1485907200000,
"doc_count": 2766
},
{
"key_as_string": "2017-03-01T00:00:00.000Z",
"key": 1488326400000,
"doc_count": 3869
},
{
"key_as_string": "2017-04-01T00:00:00.000Z",
"key": 1491004800000,
"doc_count": 6251
},
{
"key_as_string": "2017-05-01T00:00:00.000Z",
"key": 1493596800000,
"doc_count": 2640
},
{
"key_as_string": "2017-06-01T00:00:00.000Z",
"key": 1496275200000,
"doc_count": 5541
},
{
"key_as_string": "2017-07-01T00:00:00.000Z",
"key": 1498867200000,
"doc_count": 5686
},
{
"key_as_string": "2017-08-01T00:00:00.000Z",
"key": 1501545600000,
"doc_count": 6524
},
{
"key_as_string": "2017-09-01T00:00:00.000Z",
"key": 1504224000000,
"doc_count": 8351
},
{
"key_as_string": "2017-10-01T00:00:00.000Z",
"key": 1506816000000,
"doc_count": 4848
},
{
"key_as_string": "2017-11-01T00:00:00.000Z",
"key": 1509494400000,
"doc_count": 4209
},
{
"key_as_string": "2017-12-01T00:00:00.000Z",
"key": 1512086400000,
"doc_count": 1092
},
{
"key_as_string": "2018-01-01T00:00:00.000Z",
"key": 1514764800000,
"doc_count": 2425
},
{
"key_as_string": "2018-02-01T00:00:00.000Z",
"key": 1517443200000,
"doc_count": 336
},
{
"key_as_string": "2018-03-01T00:00:00.000Z",
"key": 1519862400000,
"doc_count": 5092
},
{
"key_as_string": "2018-04-01T00:00:00.000Z",
"key": 1522540800000,
"doc_count": 1354
},
{
"key_as_string": "2018-05-01T00:00:00.000Z",
"key": 1525132800000,
"doc_count": 2022
},
{
"key_as_string": "2018-06-01T00:00:00.000Z",
"key": 1527811200000,
"doc_count": 1981
},
{
"key_as_string": "2018-07-01T00:00:00.000Z",
"key": 1530403200000,
"doc_count": 1751
},
{
"key_as_string": "2018-08-01T00:00:00.000Z",
"key": 1533081600000,
"doc_count": 1705
},
{
"key_as_string": "2018-09-01T00:00:00.000Z",
"key": 1535760000000,
"doc_count": 2617
},
{
"key_as_string": "2018-10-01T00:00:00.000Z",
"key": 1538352000000,
"doc_count": 2217
},
{
"key_as_string": "2018-11-01T00:00:00.000Z",
"key": 1541030400000,
"doc_count": 1734
},
{
"key_as_string": "2018-12-01T00:00:00.000Z",
"key": 1543622400000,
"doc_count": 1962
},
{
"key_as_string": "2019-01-01T00:00:00.000Z",
"key": 1546300800000,
"doc_count": 2601
},
{
"key_as_string": "2019-02-01T00:00:00.000Z",
"key": 1548979200000,
"doc_count": 2573
},
{
"key_as_string": "2019-03-01T00:00:00.000Z",
"key": 1551398400000,
"doc_count": 2705
}
]
},
"count_buckets": {
"count": 31,
"min": 336,
"max": 8351,
"avg": 3179.483870967742,
"sum": 98564
}
}
]
}
}
}
我只想要那些“count\u bucket”中的“count”大于30的bucket。是的,您可以根据需要在术语聚合中使用
min\u doc\u count
参数(值为30)min\u doc\u count
是获得这样结果的最有效方法。在这种情况下,即使您不需要使用count_bucket,也可以使用相同的聚合,请遵循以下代码:
...
...
"aggs": {
"grp_company": {
"terms": {
"field": "compId",
"min_doc_count": 30
},
"aggs": {
"data_per_month": {
"date_histogram": {
"field": "dateCreated",
"interval": "month"
}
},
"count_buckets": {
"stats_bucket": { --> I am getting the count of buckets here
"buckets_path": "data_per_month._count"
}
}
}
}
}
上面的聚合将只返回那些在30次点击中找到的bucket。默认值为1
为了进一步了解,您可以在此处阅读Elastic官方文档:
希望这能对您有所帮助。如果我理解正确,您要做的是根据
count\u bucket.count
值过滤bucket。如果date\u直方图
创建的存储桶数量大于30
,则应保留该存储桶(针对compId
),否则应将其排除在外。换句话说,您希望根据条件选择一个存储桶。为此,您已经添加了stats\u bucket
aggregation以获取bucket的计数。现在,这可以用作的参数。Bucket selector聚合正好完成所需的操作
只需将bucket\u选择器
聚合添加到查询中,如下所示:
{
"size": 0,
"query": {
"bool": {
"filter": {
"terms": {
"compId": [
111,
112
]
}
},
"must": {
"range": {
"dateCreated": {
"from": "2016-04-01",
"to": "2019-03-31",
"format": "yyyy-MM-dd"
}
}
}
}
},
"aggs": {
"grp_company": {
"terms": {
"field": "compId"
},
"aggs": {
"data_per_month": {
"date_histogram": {
"field": "dateCreated",
"interval": "month"
}
},
"count_buckets": {
"stats_bucket": {
"buckets_path": "data_per_month._count"
}
},
"bucket_filter": {
"bucket_selector": {
"buckets_path": {
"bucket_count": "count_buckets.count"
},
"script": "params.bucket_count > 30"
}
}
}
}
}
}
不,我猜你不明白我的问题。我只想要那些“桶大小”大于30的“日期直方图”。例如,如果给定公司的date_柱状图的bucket(bucket size)中有“20”个元素,则应忽略它。您是否尝试过上述解决方案?我认为这将只返回那些在列表中有30个或更多桶的直方图。它不会返回少于30个桶的直方图。我希望这能起作用。是的,我试过了,它不起作用。我已经更新了我的问题。我希望这能澄清我的疑问。是的!这正是我的问题所在,解决方案非常有效!谢谢你一吨!:)还有一件事。在Kibana中有可能实现这一点吗?在可视化中?是的,在可视化中。