Riak mapReduce因>;15项记录 问题

Riak mapReduce因>;15项记录 问题,mapreduce,erlang,riak,Mapreduce,Erlang,Riak,我一直在学习Riak,在mapReduce上遇到了一个问题。当有15条记录时,我的mapReduce函数可以正常工作,但在这之后,它会抛出堆栈跟踪错误。我是Riak和Erlang的新手,所以我不确定这是我的代码还是Riak。任何关于如何调试这个或问题是什么的建议都是值得赞赏的 代码 地图 减少 资料 样本值 堆栈跟踪 { "阶段":一,, “错误”错误:“错误”:“{函数”的一个条款,[{5.5.621,未定义的,“{文件,{文件”的一个prop5.621,未定义的,“{文件,{5.5”的一个条

我一直在学习Riak,在mapReduce上遇到了一个问题。当有15条记录时,我的mapReduce函数可以正常工作,但在这之后,它会抛出堆栈跟踪错误。我是Riak和Erlang的新手,所以我不确定这是我的代码还是Riak。任何关于如何调试这个或问题是什么的建议都是值得赞赏的

代码 地图 减少 资料 样本值 堆栈跟踪
{
"阶段":一,,
“错误”错误:“错误”:“{函数”的一个条款,[{5.5.621,未定义的,“{文件,{文件”的一个prop5.621,未定义的,“{文件,{5.5”的一个条款,{5.5”的一个条款,{5.9.5.1,未定义的,{文件,{9.9.1”的Pro列表.厄尔.....网页,{1,{1,{线路,225},},{线路,225},},{线路,225},},},{线路,225},},{线路,225},},},},},},},{线路,线路,225,},},{3,{3,{文件,3,{文件,{文件,{文件,{文件,{文件,{文件,{文件,{文件减少,减少,3[{file,\'src/riak\u kv\u w\u reduce.erl\',{line,207},{riak\u kv\u reduce,done,1,[{file,\'src/riak\u kv\u w\u reduce.erl\',{line,170},{riak\u管道节点工人,等待输入,},“,
“输入”:空,
“类型”:空,
“堆栈”:空
}
假设
从堆栈跟踪来看,reduce似乎应用于一个值,而不是一个元组列表,但奇怪的是,当我只将10-15条记录放入bucket时,它工作正常。

问题在于reduce阶段。map阶段分布在集群中,由许多转发map的vnode执行将阶段结果发送到将运行reduce的节点。由于这些结果不会同时到达,reduce phase函数可能会运行多次,其后续运行的输入是前一个reduce结果和新到达的map phase结果的串联

这意味着在第二次运行时,reduce函数将前一个avarage作为一个普通数字作为列表中的第一个元素接收,而列表的其余部分则是您期望的json对象/proplist

要解决此问题,请让reduce函数返回一个proplist,其中包含当前的平均值和到目前为止看到的值的数目。下面是一种可能性,但此示例将以对象/proplist而不是数字的形式返回MapReduce的最终结果

average_high(Values, _) ->
   {Count,Total} = lists:foldl(
     fun(Record, {Cnt,Tot}) ->
       case proplists:get_value(<<"average">>,Record,undefined) of
         undefined ->
           High = proplists:get_value(<<"high_d">>, Record),
           {Cnt+1,Tot + High};
         Ave ->
           C = proplists:get_value(<<"count">>, Record, 1),
           {Cnt + C, Tot + (Ave * C)}
       end
     end,
   {0,0},
   Values),
   [[{<<"average">>,Total/Count},{<<"count">>,Count}]].
 -module(stocks_summary).

 -export([average_high/2]).

 % Returns values from a map phase
 average_high(Values, _) ->
   Total = lists:foldl(
     fun(Record, Accum) ->
       High = proplists:get_value(<<"high_d">>, Record),
       Accum + High
     end,
   0,
   Values),
   [Total / length(Values)].
curl -XPOST http://192.168.0.126:8098/mapred \
-H 'Content-Type: application/json' \
-d '{"inputs": ["stocks","goog"], "query": [{"map":{"language":"erlang","module":"identity_map","function":"identity"}}, {"reduce":{"language":"erlang","module":"stocks_summary","function":"average_high"}} ]}'
Date,Open,High,Low,Close,Volume,Adj Close
2010-05-05,500.98,515.72,500.47,509.76,4566900,509.76
2010-05-04,526.52,526.74,504.21,506.37,6076300,506.37
2010-05-03,526.50,532.92,525.08,530.60,1857800,530.60
2010-04-30,531.13,537.68,525.44,525.70,2435400,525.70
2010-04-29,533.37,536.50,526.67,532.00,3058900,532.00
2010-04-28,532.10,534.83,521.03,529.19,3406100,529.19
2010-04-27,528.95,538.33,527.23,529.06,3844700,529.06
2010-04-26,544.97,544.99,529.21,531.64,4368800,531.64
2010-04-23,547.25,549.32,542.27,544.99,2089400,544.99
2010-04-22,552.00,552.50,543.35,547.06,3280700,547.06
2010-04-21,556.46,560.25,552.16,554.30,2391500,554.30
2010-04-20,554.17,559.66,551.06,555.04,2977400,555.04
2010-04-19,548.75,553.99,545.00,550.10,3894000,550.10
2010-04-16,563.00,568.81,549.63,550.15,12235500,550.15
2010-04-15,592.17,597.84,588.29,595.30,6716700,595.30
2010-04-14,590.06,592.34,584.01,589.00,3402700,589.00
2010-04-13,572.53,588.88,571.13,586.77,3845200,586.77
2010-04-12,567.35,574.00,566.22,572.73,2352400,572.73
2010-04-09,567.49,568.77,564.00,566.22,2056600,566.22
2010-04-08,563.32,569.85,560.05,567.49,1947500,567.49
2010-04-07,567.30,568.75,561.86,563.54,2581000,563.54
{
    "date_s": "2010-04-07",
    "open_d": 567.3,
    "high_d": 568.75,
    "low_d": 561.86,
    "close_d": 563.54,
    "volume_i": 2581000,
    "adjClose_d": 563.54
}
{
    "phase": 1,
    "error": "{function_clause,[{proplists,get_value,[<<\"high_d\">>,555.621,undefined],[{file,\"proplists.erl\"},{line,225}]},{stocks_summary,'-average_high/2-fun-0-',2,[{file,\"stocks_summary.erl\"},{line,9}]},{lists,foldl,3,[{file,\"lists.erl\"},{line,1248}]},{stocks_summary,average_high,2,[{file,\"stocks_summary.erl\"},{line,7}]},{riak_kv_w_reduce,reduce,3,[{file,\"src/riak_kv_w_reduce.erl\"},{line,207}]},{riak_kv_w_reduce,done,1,[{file,\"src/riak_kv_w_reduce.erl\"},{line,170}]},{riak_pipe_vnode_worker,wait_for_input,...},...]}",
    "input": null,
    "type": null,
    "stack": null
}
average_high(Values, _) ->
   {Count,Total} = lists:foldl(
     fun(Record, {Cnt,Tot}) ->
       case proplists:get_value(<<"average">>,Record,undefined) of
         undefined ->
           High = proplists:get_value(<<"high_d">>, Record),
           {Cnt+1,Tot + High};
         Ave ->
           C = proplists:get_value(<<"count">>, Record, 1),
           {Cnt + C, Tot + (Ave * C)}
       end
     end,
   {0,0},
   Values),
   [[{<<"average">>,Total/Count},{<<"count">>,Count}]].
F([a,b,c,d])   
F([a,d] ++ F([c,b]))  
F([F([a]),F([c]),F([b]),F([d])])