MongoDB-$unwind-如何在$group聚合中展开多个数组
(1) 我已将以下内容添加到集合中:MongoDB-$unwind-如何在$group聚合中展开多个数组,mongodb,pymongo,Mongodb,Pymongo,(1) 我已将以下内容添加到集合中: { "_id" : 1, "hitsPerOneSecond" : [ 2, 3, 5, 4, 1, 2, 3, 4, 1, 2 ], "startTime" : ISODate("2012-04-07T10:41:33.380Z"), "returnCodeHits" : { "300" : 5, "200" : 12 }, "xxxServer" : "xxx:8100", "statsSummarizedToSeconds" : 10, "pat
{ "_id" : 1, "hitsPerOneSecond" : [ 2, 3, 5, 4, 1, 2, 3, 4, 1, 2 ], "startTime" : ISODate("2012-04-07T10:41:33.380Z"), "returnCodeHits" : { "300" : 5, "200" : 12 }, "xxxServer" : "xxx:8100", "statsSummarizedToSeconds" : 10, "pathStats_xxx_api_get_version" : [ 0.2280779683225852, 0.030849283020361273, 0.9947690473370484 ], "pathStats_xxx_api_get_response" : [ 1.2163705612407407, 1.0602539963494662, 1.4853219936411421 ], "type" : "xxxType", "startTimeStr" : "07-04-2012:10AM" }
{ "_id" : 2, "hitsPerOneSecond" : [ 2, 3, 5, 4, 1, 2, 3, 4, 1, 2 ], "startTime" : ISODate("2012-04-07T10:41:43.380Z"), "returnCodeHits" : { "300" : 5, "200" : 12 }, "xxxServer" : "xxx:8100", "statsSummarizedToSeconds" : 10, "pathStats_xxx_api_get_version" : [ 0.2280779683225852, 0.030849283020361273, 0.9947690473370484 ], "pathStats_xxx_api_get_response" : [ 1.2163705612407407, 1.0602539963494662, 1.4853219936411421 ], "type" : "xxxType", "startTimeStr" : "07-04-2012:10AM" }
(2) 执行以下聚合时:
db.newStats.aggregate({$unwind: "$hitsPerOneSecond"},{$group:{_id:"$startTimeStr", totalHits: {$sum: "$hitsPerOneSecond"}, totalHitsCount: {$sum: 1}, avgHit: {$avg: "$hitsPerOneSecond"}, minHit: {$min:"$hitsPerOneSecond"}, maxHit:{$max: "$hitsPerOneSecond"}}});
(3) 结果正确无误:
{
"result" : [
{
"_id" : "07-04-2012:10AM",
"totalHits" : 54,
"totalHitsCount" : 20,
"avgHit" : 2.7,
"minHit" : 1,
"maxHit" : 5
}
],
"ok" : 1
}
(4) 但是,我需要在上面相同的聚合中对“pathStats\u xxx\u api\u get\u response”(来自集合)执行展开,以便在上面相同的结果中有totalResponses、TotalResponseCount、avgResponse、minResponse和maxResponse输出。因此,我的结果应该是这样的:
{
"result" : [
{
"_id" : "07-04-2012:10AM",
"totalHits" : 54,
"totalHitsCount" : 20,
"avgHit" : 2.7,
"minHit" : 1,
"maxHit" : 5,
"totalResponses" : ??
"totalResponsesCount": ??
"avgResponse" : 2.7,
"minResponse" : 1,
"maxResponse" : 5
}
],
"ok" : 1
}
不知道如何在相同的聚合中添加更多的$unwind,因为我几乎就要做到了 最简单的解决方案可能是使用两个单独的聚合操作,并在应用程序中组合结果 或者,您可以通过Map Reduce操作执行此操作: 以下map和reduce函数应提供您要查找的结果:
var map = function() {
var totalHits = this.hitsPerOneSecond.map(function(a,b){return a+b;});
var totalHitsCount = this.hitsPerOneSecond.length;
var avgHit = totalHits / totalHitsCount;
var minHit = Math.min.apply(Math, this.hitsPerOneSecond);
var maxHit = Math.max.apply(Math, this.hitsPerOneSecond);
var totalResponses = pathStats_xxx_api_get_response.map(function(a,b){return a+b;});
var totalResponsesCount = this.pathStats_xxx_api_get_response.length;
var avgResponse = totalResponses / totalResponsesCount;
var minResponse = Math.min.apply(Math, this.pathStats_xxx_api_get_response);
var maxResponse = Math.max.apply(Math, this.pathStats_xxx_api_get_response);
emit(this.startTimeStr, {
"totalHits": totalHits,
"totalHitsCount": totalHitsCount,
"avgHit": avgHit,
"minHit": minHit,
"maxHit": maxHit,
"totalResponses": totalResponses,
"totalResponsesCount": totalResponsesCount,
"avgResponse": avgResponse,
"maxResponse": maxResponse,
"minResponse": minResponse
})
}
var reduce = function(key, values) {
var output = {
"totalHits": 0,
"totalHitsCount": 0,
"avgHit": 0,
"minHit": null,
"maxHit": null,
"totalResponses": 0,
"totalResponsesCount": 0,
"avgResponse": 0,
"maxResponse": null,
"minResponse": null
};
values.forEach(function(v) {
output.totalHits += v.totalHits;
output.totalHitsCount += v.totalHitsCount;
output.avgHit = output.totalHits / output.totalHitsCount;
if (output.minHit == null) {
output.minHit = v.minHit;
} else {
if (v.minHit < output.minHit) {
output.minHit = v.minHit
}
}
if (output.maxHit == null) {
output.maxHit = v.maxHit;
} else {
if (v.maxHit > output.maxHit) {
output.maxHit = v.maxHit
}
}
output.totalResponses += v.totalResponses;
output.totalResponsesCount += v.totalResponsesCount;
output.avgResponse = output.totalResponses / output.totalResponsesCount;
if (output.minResponse == null) {
output.minResponse = v.minResponse;
} else {
if (v.minResponse < output.minResponse) {
output.minResponse = v.minResponse
}
}
if (output.maxResponse == null) {
output.maxResponse = v.maxResponse;
} else {
if (v.maxResponse > output.maxResponse) {
output.maxResponse = v.maxResponse
}
}
});
return output;
}
> db.newStats.mapReduce(map, reduce, {out:{inline:1}})
{
"results" : [
{
"_id" : "07-04-2012:10AM",
"value" : {
"totalHits" : 54,
"totalHitsCount" : 20,
"avgHit" : 2.7,
"minHit" : 1,
"maxHit" : 5,
"totalResponses" : 7.523893102462698,
"totalResponsesCount" : 6,
"avgResponse" : 1.253982183743783,
"maxResponse" : 1.4853219936411421,
"minResponse" : 1.0602539963494662
}
}
],
"timeMillis" : 0,
"counts" : {
"input" : 2,
"emit" : 2,
"reduce" : 1,
"output" : 1
},
"ok" : 1,
}
>
如何
$unwind
多个阵列?您是否多次尝试过$unwinding
请记住,聚合框架将数组作为输入(请注意,我添加了[
,]
)。在数组中,您可以向管道添加任意数量的聚合函数(需要引用),任何步骤的输出都将是下一步的输入
注意:
不要忘记,如果您试图在一个不存在的键或一个空数组上
$unwind
,您最终将没有任何文档!这就像乘以0
我猜。。。因此,对于多个(可能多个)$unwind
,处理重影的机会会增加:如果所涉及的任何数组为空,则整个文档将丢失,而您的任何$group
聚合都将一无所获……这是一个极好的示例。非常感谢。此外,链接到文件是非常感谢。在聚合方面-目标是通过仪表板和pymongo聚合动态统计数据,但我认为map/reduce和使用$group聚合可能仍然相对缓慢?我希望从一个python脚本中聚合60080(相当于1周)的10秒数据统计数据,该脚本已经从apache日志文件中将其整理为10秒统计数据。很高兴提供帮助!我更新了我的答案,并在地图上做了一些注释。不过这是一个重要的问题。。您将如何通过pymongo执行db.newStats.aggregate({$unwind:$hitsPerOneSecond“}..etc,因为我找不到任何相关文档?我已附加了我的答案以包括pymongo示例。啊哈,对了,newStats是集合的名称。我将其添加为“聚合”:“startTimeStr”想知道.司令部怎么知道我说的是哪一个收藏!干杯。
In [1]: import pymongo
In [2]: conn = pymongo.Connection()
In [3]: db = conn.test
In [4]: result = db.command({"aggregate":"newStats", "pipeline":
[{"$unwind": "$hitsPerOneSecond"},
{"$group": {"_id":"$startTimeStr",
"totalHits": {"$sum":
"$hitsPerOneSecond"},
"totalHitsCount": {"$sum": 1},
"avgHit": {"$avg": "$hitsPerOneSecond"},
"minHit": {"$min":"$hitsPerOneSecond"},
"maxHit":{"$max": "$hitsPerOneSecond"}}}]})
In [5]: result
Out[5]:
{u'ok': 1.0,
u'result': [{u'_id': u'07-04-2012:10AM',
u'avgHit': 2.7,
u'maxHit': 5.0,
u'minHit': 1.0,
u'totalHits': 54.0,
u'totalHitsCount': 20}]}
db.newStats.aggregate([
{$unwind: "$hitsPerOneSecond"},
{$unwind: "$pathStats_xxx_api_get_response"},
{$group:{
_id:"$startTimeStr",
totalHits: {$sum: "$hitsPerOneSecond"},
totalHitsCount: {$sum: 1},
avgHit: {$avg: "$hitsPerOneSecond"},
minHit: {$min:"$hitsPerOneSecond"},
maxHit:{$max: "$hitsPerOneSecond"},
totalResponses: {$sum: "$pathStats_xxx_api_get_response"},
. . .
}}
]);