Javascript MongoDB单值Reduce键
我想计算客户的订单数量,从而计算去年、上月和上周的订单数量。 我写了一个MapReduce程序:Javascript MongoDB单值Reduce键,javascript,mongodb,mapreduce,Javascript,Mongodb,Mapreduce,我想计算客户的订单数量,从而计算去年、上月和上周的订单数量。 我写了一个MapReduce程序: var mapOrders = function() { var v_order = { order_date : this.dt_order ... }; emit(this.clientid, v_order); }; var reduceOrders = function(p_clientid, p_orders) { /
var mapOrders = function() {
var v_order = {
order_date : this.dt_order
...
};
emit(this.clientid, v_order);
};
var reduceOrders = function(p_clientid, p_orders) {
// Initialization of the output format of the couters
var r_result = { orders_count : {
total: {
1year: 0,
1month: 0,
7day: 0
}
...
}}
for (var c_order = 0; c_order < p_orders.length; c_order++) {
// Increment counters
}
return (r_result);
};
db.orders.mapReduce(
mapOrders,
reduceOrders,
{
out: { merge: "tmp_orders_indicators" }
}
)
只有一个订单的客户端不执行reduce函数。
我在MongoDB doucmentation中发现了这一点,可以解释这种行为:
MongoDB不会为只有
单一值
我如何才能在输出集合中只有一种类型的记录看起来像这样?强制所有记录通过reduce函数
{
"_id" : 80306,
"value" : {
"orders_count" : {
"total" : {
"count_1year" : 18,
"count_1month" : 6,
"count_7day" : 1
}
...
}
}
您可以通过聚合无缝地实现这一点。考虑下面的管道:
var dateSevenDaysAgo = new Date();
dateSevenDaysAgo.setDate(dateSevenDaysAgo.getDate()-7);
var dateMonthAgo = new Date();
dateMonthAgo.setMonth(dateMonthAgo.getMonth()-1);
var dateYearAgo = new Date();
dateYearAgo.setFullYear(dateYearAgo.getFullYear()-1);
var pipeline = [
{ "$match": { "$dt_order": { "$gte": dateYearAgo } } },
{
"$group": {
"_id": "$id_client",
"count_1year": {
"$sum": {
"$cond": [
{ "$gte": [ "$dt_order", dateYearAgo ] },
1, 0
]
}
},
"count_1month": {
"$sum": {
"$cond": [
{ "$gte": [ "$dt_order", dateMonthAgo ] },
1, 0
]
}
},
"count_7day": {
"$sum": {
"$cond": [
{ "$gte": [ "$dt_order", dateSevenDaysAgo ] },
1, 0
]
}
}
}
},
{ "$out": "tmp_indicators" }
];
db.orders.aggregate(pipeline);
db.tmp_indicators.find();
找到了使用finalize实用程序的解决方案
var mapOrders = function() {
var v_order = {
order_date : this.dt_order
...
};
emit(this.clientid, v_order);
};
var reduceOrders = function(p_clientid, p_orders) {
// Initialization of the output format of the couters
var r_result = { orders_count : {
total: {
1year: 0,
1month: 0,
7day: 0
}
...
}}
for (var c_order = 0; c_order < p_orders.length; c_order++) {
// Increment counters
}
return (r_result);
};
var finalizeOrders = function(p_clientid, p_ReducedDrders) {
if (typeof p_ReducedDrders.orders_count === 'undefined' )
// Initialization of the output format of the couters
var r_result = { orders_count : {
total: {
1year: 0,
1month: 0,
7day: 0
}
...
}}
// do the same stuff as the for loop in the reducer
}
else {
r_result = p_ReducedDrders
}
return (r_result);
};
db.orders.mapReduce(
mapOrders,
reduceOrders,
{
out: { merge: "tmp_orders_indicators" },
finalize : finalizeOrders
}
)
var-mapOrders=function(){
变量v_顺序={
订单日期:此.dt\u订单
...
};
emit(this.clientid,v_顺序);
};
var reduceOrders=函数(p_clientid,p_orders){
//计数器输出格式的初始化
var r_result={orders_count:{
总数:{
1岁:0,
1个月:0,
7天:0
}
...
}}
对于(var c_order=0;c_order
谢谢您的回答。我第一次尝试使用Agegation框架,但我必须处理大约50000个订单。由于$out没有合并选项,文档的最大大小为16MB,因此我没有找到比MapReduce更好的解决方案。为了允许处理大型数据集,使用allowDiskUse
选项启用聚合管道阶段将数据写入临时文件。尝试在管道开始处添加额外的$match
过滤器,通过减少通过的数据量来优化它(聚合您只需要的数据,即去年到现在的订单)。不幸的是,我目前正在处理50000条记录,这些记录只代表2个月的数据(30000000条记录/年),因此在这种情况下,$match
将不是我的朋友。我尝试使用db.runCommand({aggregate:“orders”,pipeline:pipeline allowDiskUse:true})
但我遇到了16mb限制错误
var mapOrders = function() {
var v_order = {
order_date : this.dt_order
...
};
emit(this.clientid, v_order);
};
var reduceOrders = function(p_clientid, p_orders) {
// Initialization of the output format of the couters
var r_result = { orders_count : {
total: {
1year: 0,
1month: 0,
7day: 0
}
...
}}
for (var c_order = 0; c_order < p_orders.length; c_order++) {
// Increment counters
}
return (r_result);
};
var finalizeOrders = function(p_clientid, p_ReducedDrders) {
if (typeof p_ReducedDrders.orders_count === 'undefined' )
// Initialization of the output format of the couters
var r_result = { orders_count : {
total: {
1year: 0,
1month: 0,
7day: 0
}
...
}}
// do the same stuff as the for loop in the reducer
}
else {
r_result = p_ReducedDrders
}
return (r_result);
};
db.orders.mapReduce(
mapOrders,
reduceOrders,
{
out: { merge: "tmp_orders_indicators" },
finalize : finalizeOrders
}
)