Filter ElasticSearch-过滤器嵌套聚合
我在筛选结果后聚合结果时遇到问题。我想我是在正确的轨道上,但我觉得我在追我的尾巴 下面是它的外观:Filter ElasticSearch-过滤器嵌套聚合,filter,elasticsearch,nested,aggregation,Filter,elasticsearch,Nested,Aggregation,我在筛选结果后聚合结果时遇到问题。我想我是在正确的轨道上,但我觉得我在追我的尾巴 下面是它的外观: PUT /my_index { "mappings": { "reporting": { "properties": { "events": { "type": "nested", "properties": { "name": { "type": "string", "index" :
PUT /my_index
{
"mappings": {
"reporting": {
"properties": {
"events": {
"type": "nested",
"properties": {
"name": { "type": "string", "index" : "not_analyzed" },
"date": { "type": "date" }
}
}
}
}
}
}
因此,我的文档看起来像:
{
"events": [
{ "name": "INSTALL", "date": "2014-11-01" },
{ "name": "UNINSTALL", "date": "2014-11-03" },
{ "name": "INSTALL", "date": "2014-11-04" },
...
]
}
... omitted 4 documents that match filter criteria ...
"aggregations": {
"filtered_result": {
"doc_count": 4, <---- this is ok, I really have 4 docs that match criteria
"result": {
"doc_count": 12, <---- those 4 documents really have 12 events (together)
"NAME": {
"buckets": [
{
"key": 1414800000000,
"key_as_string": "2014-11-01",
"doc_count": 2
},
{
"key": 1414886400000,
"key_as_string": "2014-11-02",
"doc_count": 2
},
{
"key": 1414972800000,
"key_as_string": "2014-11-03",
"doc_count": 6
},
{
"key": 1415145600000,
"key_as_string": "2014-11-05",
"doc_count": 2
}
]
}
}
}
}
现在,当我索引一些数据时,例如:
PUT /my_index/reporting/1
{
"events": [
{
"name": "INSTALL",
"date": "2014-11-01"
},
{
"name": "UNINSTALL",
"date": "2014-11-05"
}
]
}
PUT /my_index/reporting/2
{
"events": [
{
"name": "INSTALL",
"date": "2014-11-01"
},
{
"name": "UNINSTALL",
"date": "2014-11-03"
}
]
}
PUT /my_index/reporting/3
{
"events": [
{
"name": "INSTALL",
"date": "2014-11-01"
},
{
"name": "UNINSTALL",
"date": "2014-11-02"
}
]
}
PUT /my_index/reporting/4
{
"events": [
{
"name": "INSTALL",
"date": "2014-11-01"
},
{
"name": "UNINSTALL",
"date": "2014-11-02"
},
{
"name": "INSTALL",
"date": "2014-11-03"
}
]
}
PUT /my_index/reporting/5
{
"events": [
{
"name": "INSTALL",
"date": "2014-11-01"
},
{
"name": "UNINSTALL",
"date": "2014-11-03"
},
{
"name": "INSTALL",
"date": "2014-11-03"
}
]
}
PUT /my_index/reporting/6
{
"events": [
{
"name": "INSTALL",
"date": "2014-11-03"
},
{
"name": "UNINSTALL",
"date": "2014-11-03"
},
{
"name": "INSTALL",
"date": "2014-11-05"
}
]
}
PUT /my_index/reporting/7
{
"events": [
{
"name": "INSTALL",
"date": "2014-11-02"
},
{
"name": "UNINSTALL",
"date": "2014-11-03"
},
{
"name": "INSTALL",
"date": "2014-11-05"
}
]
}
PUT /my_index/reporting/8
{
"events": [
{
"name": "INSTALL",
"date": "2014-11-01"
}
]
}
我想得到在2014-11-02之后(包括2014-11-02之后)安装但没有卸载的用户的数量(因此,卸载是在2014-11-02之前,或者没有卸载事件),并将他们分组到日期直方图中(要有带有“日期”->“计数”数据的存储桶)
我设法在这个嵌套数据上编写了过滤器,所以我可以得到过滤的结果,但当涉及到直方图聚合时,我一直在跟踪
这就是我一直坚持的地方
GET /my_index/reporting/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "events",
"filter": {
"bool": {
"must": [
{
"term": {
"name": "INSTALL"
}
},
{
"range": {
"date": {
"gte": "2014-11-02"
}
}
}
]
}
}
}
},
{
"nested": {
"path": "events",
"filter": {
"bool": {
"should": [
{
"bool": {
"must_not": [
{
"term": {
"name": "UNINSTALL"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"name": "UNINSTALL"
}
},
{
"range": {
"date": {
"lt": "2014-11-02"
}
}
}
]
}
}
]
}
}
}
}
]
}
}
}
},
"aggregations": {
"filtered_result": {
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "events",
"filter": {
"bool": {
"must": [
{
"term": {
"name": "INSTALL"
}
},
{
"range": {
"date": {
"gte": "2014-11-02"
}
}
}
]
}
}
}
},
{
"nested": {
"path": "events",
"filter": {
"bool": {
"should": [
{
"bool": {
"must_not": [
{
"term": {
"name": "UNINSTALL"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"name": "UNINSTALL"
}
},
{
"range": {
"date": {
"lt": "2014-11-02"
}
}
}
]
}
}
]
}
}
}
}
]
}
},
"aggs": {
"result": {
"nested": {
"path": "events"
},
"aggs": {
"NAME": {
"terms": {
"field": "events.date",
"format": "yyyy-MM-dd",
"order": {
"_term": "asc"
}
}
}
}
}
}
}
}
}
我的结果如下:
{
"events": [
{ "name": "INSTALL", "date": "2014-11-01" },
{ "name": "UNINSTALL", "date": "2014-11-03" },
{ "name": "INSTALL", "date": "2014-11-04" },
...
]
}
... omitted 4 documents that match filter criteria ...
"aggregations": {
"filtered_result": {
"doc_count": 4, <---- this is ok, I really have 4 docs that match criteria
"result": {
"doc_count": 12, <---- those 4 documents really have 12 events (together)
"NAME": {
"buckets": [
{
"key": 1414800000000,
"key_as_string": "2014-11-01",
"doc_count": 2
},
{
"key": 1414886400000,
"key_as_string": "2014-11-02",
"doc_count": 2
},
{
"key": 1414972800000,
"key_as_string": "2014-11-03",
"doc_count": 6
},
{
"key": 1415145600000,
"key_as_string": "2014-11-05",
"doc_count": 2
}
]
}
}
}
}
基本上,符合条件的4个文档是按该条件出现的日期分发的,“2011-11-03”上有2个文档,“2014-11-05”上有2个文档(2014-11-02之后有事件“安装”,之后没有卸载事件的4个文档(它们仍在安装)。这是部分答案 有一个主要问题:根据您的数据,实际上没有符合您要求的文档,因此我添加了一些:
curl -XPUT 'localhost:9200/my_index/reporting/9' -d '{
"events": [
{
"name": "INSTALL",
"date": "2014-11-03"
}
]
}'
curl -XPUT 'localhost:9200/my_index/reporting/10' -d '{
"events": [
{
"name": "INSTALL",
"date": "2014-11-03"
},
{
"name": "UNINSTALL",
"date": "2014-11-01"
}
]
}'
为了能够应用逻辑,我更改了模式,使事件也包含在父级中-这样您就可以搜索“没有任何卸载事件”。因为在嵌套搜索中,您总是只查看一个事件,所以您不能执行任何类型的“报告范围”搜索
curl -XPUT 'localhost:9200/my_index' -d '{
"mappings": {
"reporting": {
"properties": {
"events": {
"type": "nested", "include_in_root": true,
"properties": {
"name": { "type": "string", "index" : "not_analyzed" },
"date": { "type": "date" }
}
}
}
}
}
}'
现在是查询本身。似乎在使用嵌套过滤器时,您不能直接转到“过滤器”。您必须首先执行“查询>过滤>过滤”操作
一般来说,编写长elasticsearch查询的一个技巧是——记住除了“必须”和“不得”之外,还有“and”和“and”或“or”操作符——只需将其写出如下代码即可。在您的情况下:
has_one(event.name == 'INSTALL' && event.date >= '2014-11-02')
&& has_none(event.name == 'UNINSTALL')
&& has_none(event.name == 'UNINSTALL' && event.date >= '2014-11-02')
或:
我可以应用除最后一个has_only/has_none之外的所有内容。为此,您可能希望尝试使用子文档。在那里,您至少可以在must_not bool下使用has_子筛选器
当前查询:
GET /my_index/reporting/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": {
"filters": [
{
"or": {
"filters": [
{
"bool": {
"must_not": [
{
"term": {
"events.name": "UNINSTALL"
}
}
]
}
},
{
"nested": {
"path": "events",
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"name": "UNINSTALL"
}
},
{
"range": {
"date": {
"lt": "2014-11-02"
}
}
}
]
}
}
}
}
}
}
]
}
},
{
"nested": {
"path": "events",
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"name": "INSTALL"
}
},
{
"range": {
"date": {
"gte": "2014-11-02"
}
}
}
]
}
}
}
}
}
}
]
}
}
}
},
"aggregations": {
"filtered_result": {
"filter": {
"and": {
"filters": [
{
"or": {
"filters": [
{
"bool": {
"must_not": [
{
"term": {
"events.name": "UNINSTALL"
}
}
]
}
},
{
"nested": {
"path": "events",
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"name": "UNINSTALL"
}
},
{
"range": {
"date": {
"lt": "2014-11-02"
}
}
}
]
}
}
}
}
}
}
]
}
},
{
"nested": {
"path": "events",
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"name": "INSTALL"
}
},
{
"range": {
"date": {
"gte": "2014-11-02"
}
}
}
]
}
}
}
}
}
}
]
}
},
"aggs": {
"result": {
"nested": {
"path": "events"
},
"aggs": {
"NAME": {
"terms": {
"field": "date",
"format": "yyyy-MM-dd",
"order": {
"_term": "asc"
}
}
}
}
}
}
}
}
}