<img src="//i.stack.imgur.com/RUiNP.png" height="16" width="18" alt="" class="sponsor tag img">elasticsearch 按热门点击聚合结果分组_<img Src="//i.stack.imgur.com/RUiNP.png" Height="16" Width="18" Alt="" Class="sponsor Tag Img">elasticsearch_Lucene

elasticsearch 按热门点击聚合结果分组

lucene

elasticsearch 按热门点击聚合结果分组,elasticsearch,lucene,elasticsearch,Lucene,我想根据用户上次访问的时间筛选出文档，并获取单个用户最近访问的文档，然后根据报价代码对所有筛选出的文档进行分组我通过执行tophits聚合来获取用户最近访问的文档。但是，我无法使用offercode对tophits聚合的结果进行分组 ES查询以获取用户的最新文档 { "took": 53, "timed_out": false, "_shards": { "total": 2, "successful": 2, "fail

我想根据用户上次访问的时间筛选出文档，并获取单个用户最近访问的文档，然后根据报价代码对所有筛选出的文档进行分组

我通过执行tophits聚合来获取用户最近访问的文档。但是，我无法使用offercode对tophits聚合的结果进行分组

ES查询以获取用户的最新文档

{
    "took": 53,
    "timed_out": false,
    "_shards": {
        "total": 2,
        "successful": 2,
        "failed": 0
    },
    "hits": {
        "total": 6,
        "max_score": 1.0,
        "hits": [{
            "_index": "db",
            "_type": "users",
            "_id": "AVOiyjHmzUObmc5euUGS",
            "_score": 1.0,
            "_source": {
                "user": "james",
                "lastvisited": "2016/01/20 02:03:11",
                "browser": "chrome",
                "offercode": "JB20"
            }
        }, {
            "_index": "db",
            "_type": "users",
            "_id": "AVOiyjIQzUObmc5euUGT",
            "_score": 1.0,
            "_source": {
                "user": "james",
                "lastvisited": "2016/01/20 03:04:15",
                "browser": "firefox",
                "offercode": "JB20,JB50"
            }
        }, {
            "_index": "db",
            "_type": "users",
            "_id": "AVOiyjIlzUObmc5euUGU",
            "_score": 1.0,
            "_source": {
                "user": "james",
                "lastvisited": "2016/01/21 00:15:21",
                "browser": "chrome",
                "offercode": "JB20,JB50,JB100"
            }
        }, {
            "_index": "db",
            "_type": "users",
            "_id": "AVOiyjJKzUObmc5euUGW",
            "_score": 1.0,
            "_source": {
                "user": "peter",
                "lastvisited": "2016/01/20 02:32:22",
                "browser": "chrome",
                "offercode": "JB20,JB50,JB100"
            }
        }, {
            "_index": "db",
            "_type": "users",
            "_id": "AVOiy4jhzUObmc5euUGX",
            "_score": 1.0,
            "_source": {
                "user": "james",
                "lastvisited": "2016/01/19 02:03:11",
                "browser": "chrome",
                "offercode": ""
            }
        }, {
            "_index": "db",
            "_type": "users",
            "_id": "AVOiyjI2zUObmc5euUGV",
            "_score": 1.0,
            "_source": {
                "user": "adams",
                "lastvisited": "2016/01/20 00:12:11",
                "browser": "chrome",
                "offercode": "JB10"
            }
        }]
    }
}

ES输出

    curl -XGET localhost:9200/account/users/_search?pretty -d'{
 "size": "0",
 "query": {
    "bool": {
      "must": {
        "range": {
          "lastvisited": {
            "gte": "2016/01/19",
             "lte": "2016/01/21" 
          }
        }
      }
    }
 },
"aggs": {
  "lastvisited_users": {
      "terms": {
          "field": "user"
     }
    ,
    "aggs": {
       "top_user_hits": {
           "top_hits": {
              "sort": [
               {
                 "lastvisited": {
                   "order": "desc"
                 }
               }
             ],
             "_source": {
                 "include": [
                     "user","offercode","lastvisited"
                  ]
              },
              "size": 1
            }
        }   
    }
  }
}}'

   {
  "took" : 4,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 6,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "lastvisited_users" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [ {
        "key" : "james",
        "doc_count" : 3,
        "top_user_hits" : {
          "hits" : {
            "total" : 3,
            "max_score" : null,
            "hits" : [ {
              "_index" : "accounts",
              "_type" : "users",
              "_id" : "AVOtexIEz1WBU8vnnZ2d",
              "_score" : null,
              "_source" : {
                "lastvisited" : "2016/01/20 03:04:15",
                "offercode" : "JB20,JB50",
                "user" : "james"
              },
              "sort" : [ 1453259055000 ]
            } ]
          }
        }
      }, {
        "key" : "adams",
        "doc_count" : 1,
        "top_user_hits" : {
          "hits" : {
            "total" : 1,
            "max_score" : null,
            "hits" : [ {
              "_index" : "accounts",
              "_type" : "users",
              "_id" : "AVOtexJMz1WBU8vnnZ2h",
              "_score" : null,
              "_source" : {
                "lastvisited" : "2016/01/20 00:12:11",
                "offercode" : "JB10",
                "user" : "adams"
              },
              "sort" : [ 1453248731000 ]
            } ]
          }
        }
      }, {
        "key" : "adamsnew",
        "doc_count" : 1,
        "top_user_hits" : {
          "hits" : {
            "total" : 1,
            "max_score" : null,
            "hits" : [ {
              "_index" : "accounts",
              "_type" : "users",
              "_id" : "AVOtexJhz1WBU8vnnZ2i",
              "_score" : null,
              "_source" : {
                "lastvisited" : "2016/01/20 00:12:11",
                "offercode" : "JB1010,aka10",
                "user" : "adamsnew"
              },
              "sort" : [ 1453248731000 ]
            } ]
          }
        }
      }, {
        "key" : "peter",
        "doc_count" : 1,
        "top_user_hits" : {
          "hits" : {
            "total" : 1,
            "max_score" : null,
            "hits" : [ {
              "_index" : "accounts",
              "_type" : "users",
              "_id" : "AVOtexIoz1WBU8vnnZ2f",
              "_score" : null,
              "_source" : {
                "lastvisited" : "2016/01/20 02:32:22",
                "offercode" : "JB20,JB50,JB100",
                "user" : "peter"
              },
              "sort" : [ 1453257142000 ]
            } ]
          }
        }
      } ]
    }
  }
}

现在，我想聚合tophits聚合的结果

预期产出

    curl -XGET localhost:9200/account/users/_search?pretty -d'{
 "size": "0",
 "query": {
    "bool": {
      "must": {
        "range": {
          "lastvisited": {
            "gte": "2016/01/19",
             "lte": "2016/01/21" 
          }
        }
      }
    }
 },
"aggs": {
  "lastvisited_users": {
      "terms": {
          "field": "user"
     }
    ,
    "aggs": {
       "top_user_hits": {
           "top_hits": {
              "sort": [
               {
                 "lastvisited": {
                   "order": "desc"
                 }
               }
             ],
             "_source": {
                 "include": [
                     "user","offercode","lastvisited"
                  ]
              },
              "size": 1
            }
        }   
    }
  }
}}'

   {
  "took" : 4,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 6,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "lastvisited_users" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [ {
        "key" : "james",
        "doc_count" : 3,
        "top_user_hits" : {
          "hits" : {
            "total" : 3,
            "max_score" : null,
            "hits" : [ {
              "_index" : "accounts",
              "_type" : "users",
              "_id" : "AVOtexIEz1WBU8vnnZ2d",
              "_score" : null,
              "_source" : {
                "lastvisited" : "2016/01/20 03:04:15",
                "offercode" : "JB20,JB50",
                "user" : "james"
              },
              "sort" : [ 1453259055000 ]
            } ]
          }
        }
      }, {
        "key" : "adams",
        "doc_count" : 1,
        "top_user_hits" : {
          "hits" : {
            "total" : 1,
            "max_score" : null,
            "hits" : [ {
              "_index" : "accounts",
              "_type" : "users",
              "_id" : "AVOtexJMz1WBU8vnnZ2h",
              "_score" : null,
              "_source" : {
                "lastvisited" : "2016/01/20 00:12:11",
                "offercode" : "JB10",
                "user" : "adams"
              },
              "sort" : [ 1453248731000 ]
            } ]
          }
        }
      }, {
        "key" : "adamsnew",
        "doc_count" : 1,
        "top_user_hits" : {
          "hits" : {
            "total" : 1,
            "max_score" : null,
            "hits" : [ {
              "_index" : "accounts",
              "_type" : "users",
              "_id" : "AVOtexJhz1WBU8vnnZ2i",
              "_score" : null,
              "_source" : {
                "lastvisited" : "2016/01/20 00:12:11",
                "offercode" : "JB1010,aka10",
                "user" : "adamsnew"
              },
              "sort" : [ 1453248731000 ]
            } ]
          }
        }
      }, {
        "key" : "peter",
        "doc_count" : 1,
        "top_user_hits" : {
          "hits" : {
            "total" : 1,
            "max_score" : null,
            "hits" : [ {
              "_index" : "accounts",
              "_type" : "users",
              "_id" : "AVOtexIoz1WBU8vnnZ2f",
              "_score" : null,
              "_source" : {
                "lastvisited" : "2016/01/20 02:32:22",
                "offercode" : "JB20,JB50,JB100",
                "user" : "peter"
              },
              "sort" : [ 1453257142000 ]
            } ]
          }
        }
      } ]
    }
  }
}

我尝试使用管道聚合，但我不知道如何根据tophits聚合的结果进行分组

这可能会解决您的问题：

{
  "offercode_grouped": {
      "JB20": 1,
      "JB10": 1,
      "JB20,JB50": 1,
      "JB20,JB50,JB100": 2,
      "":1
   }
}

我希望我能正确理解你的问题。我想我找到了一个有点老套的“解决方案”

它是、和的组合

创建新索引索引文件获取聚合回应除非每个索引只有一个碎片，否则需要指定索引数据的时间。这是因为
sampler
聚合是按每个碎片计算的。因此，我们需要确保特定用户的所有数据都在同一个分片中，以获得每个用户得分最高的文档

Sampler
聚合按分数返回文档。这就是为什么我们需要修改文档的分数。在这里，
功能\分数查询
可以提供帮助。使用
字段\u值\u因子
，分数只是上次就诊的时间戳-因此就诊越近，分数越高
更新：使用
字段值\u系数
可能存在
\u分数
准确性问题。有关更多信息，请参阅问题。这就是为什么在本期中使用了clintongormley建议的
衰减
函数的原因。因为从
原点
两侧都适用。这意味着比
源文件早1天和比源文件早1天的文件获得相同的\u分数。这就是为什么我们需要过滤掉较新的文档（请参阅查询中的范围过滤器）注意：我只是使用示例中可以看到的数据进行了此查询，因此需要更大的数据集来测试查询。但是我认为它应该可以工作… 检查这个解决方案：它比较有限，但适合生产：请共享索引的映射。@Richa在帖子中添加了es映射您将无法基于提供的分组，因为它是一个分析的字符串和值'JB20，JB50，JB100`将被分为JB20 ，JB50 ，JB100 @Richa。没问题，我会将字段更改为“未分析”。但是我想知道，如果我更改分析，是否有可能得到结果。您可以通过在用户字段上应用聚合来获取用户最近访问的文档，但是您将无法在提供字段上聚合。我会试着给你回复。我想他是在要求弹性DSL查询。如果您试图建议一种使用SQL的方法，您应该解释您的答案。这绝对是我所期望的。对于较小的数据，它工作得很好，我尝试了10000条记录。聚合仅返回13个文档的输出。文档中有多少用户？如果有13个用户，那么它将按预期工作（我仍然可能不完全理解您的用例：）。它应该为每个用户获得一个文档（最新的一个-最后一次访问），并计算这13个文档的聚合…您的理解是正确的。我有10000个索引类型的唯一用户。但是，聚合返回的数据只有13个，我试图生成具有10000个唯一用户的数据集。使用1个碎片时，聚合返回100个文档。这是因为用于采样器聚合的shard_size 选项默认为100。增加价值有帮助。我也尝试了100个碎片（使用回答中提到的routing ）。速度慢了很多。特别是使用高碎片大小选项。所以我认为它不适合生产，但应该可以工作…（）我面临一个函数分数查询的问题。它为所有匹配的文档返回相同的分数。你知道吗？ curl -s -XPUT "http://127.0.0.1:9200/stackoverflow/document/1?routing=james" -d' { "user": "james", "lastvisited": "2016/01/20 02:03:11", "browser": "chrome", "offercode": "JB20" }' curl -s -XPUT "http://127.0.0.1:9200/stackoverflow/document/2?routing=james" -d' { "user": "james", "lastvisited": "2016/01/20 03:04:15", "browser": "firefox", "offercode": "JB20,JB50" }' curl -s -XPUT "http://127.0.0.1:9200/stackoverflow/document/3?routing=james" -d' { "user": "james", "lastvisited": "2016/01/21 00:15:21", "browser": "chrome", "offercode": "JB20,JB50,JB100" }' curl -s -XPUT "http://127.0.0.1:9200/stackoverflow/document/4?routing=peter" -d' { "user": "peter", "lastvisited": "2016/01/20 02:32:22", "browser": "chrome", "offercode": "JB20,JB50,JB100" }' curl -s -XPUT "http://127.0.0.1:9200/stackoverflow/document/5?routing=james" -d' { "user": "james", "lastvisited": "2016/01/19 02:03:11", "browser": "chrome", "offercode": "" }' curl -s -XPUT "http://127.0.0.1:9200/stackoverflow/document/6?routing=adams" -d' { "user": "adams", "lastvisited": "2016/01/20 00:12:11", "browser": "chrome", "offercode": "JB10" }' curl -XPOST "http://127.0.0.1:9200/stackoverflow/_search" -d' { "query": { "function_score": { "boost_mode": "replace", // we need to replace document score with the result of the functions "query": { "bool": { "filter": [ { "range": { // get documents within the date range "lastvisited": { "gte": "2016/01/19 00:00:00", "lte": "2016/01/21 23:59:59" } } } ] } }, "functions": [ { "linear": { "lastvisited": { "origin": "2016/01/21 23:59:59", // same as lastvisited lte filter "scale": "2d" // set the scale - please, see elasticsearch docs for more info https://www.elastic.co/guide/en/elasticsearch/reference/2.3/query-dsl-function-score-query.html#function-decay } } } ] } }, "aggs": { "user": { "sampler": { // get top scored document per user "field": "user", "max_docs_per_value": 1 }, "aggs": { "offers": { // aggregate user documents per `offercode` "terms": { "field": "offercode" } } } } }, "size": 0 }' { "took": 3, "timed_out": false, "_shards": { "total": 5, "successful": 5, "failed": 0 }, "hits": { "total": 6, "max_score": 0, "hits": [] }, "aggregations": { "user": { "doc_count": 3, "offers": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0, "buckets": [ { "key": "JB20,JB50,JB100", "doc_count": 2 }, { "key": "JB10", "doc_count": 1 } ] } } } }