Process 如何在Erlang中高效地实现MapReduce示例?
我试图比较并发编程语言的性能,比如Go和Erlang。以下Go代码计算平方和(重复计算平方和R次): 1^2+2^2+3^2…1024^2Process 如何在Erlang中高效地实现MapReduce示例?,process,erlang,message-passing,Process,Erlang,Message Passing,我试图比较并发编程语言的性能,比如Go和Erlang。以下Go代码计算平方和(重复计算平方和R次): 1^2+2^2+3^2…1024^2 尽管这对Erlang来说根本不是一项好任务,但有一个非常简单的解决方案: -module(mr). -export([start/1, start/2]). start([R, N]) -> Result = start(list_to_integer(R), list_to_integer(N)), io:format("~B x
尽管这对Erlang来说根本不是一项好任务,但有一个非常简单的解决方案:
-module(mr).
-export([start/1, start/2]).
start([R, N]) ->
Result = start(list_to_integer(R), list_to_integer(N)),
io:format("~B x ~B~n", [length(Result), hd(Result)]).
start(R, N) ->
Self = self(),
Reducer = start(Self, R, 1, N),
[ receive {Reducer, Result} -> Result end || _ <- lists:seq(1, R) ].
start(Parent, R, N, N) ->
spawn_link(fun() -> mapper(Parent, R, N) end);
start(Parent, R, From, To) ->
spawn_link(fun() -> reducer(Parent, R, From, To) end).
mapper(Parent, R, N) ->
[ Parent ! {self(), N*N} || _ <- lists:seq(1, R) ].
reducer(Parent, R, From, To) ->
Self = self(),
Middle = ( From + To ) div 2,
A = start(Self, R, From, Middle),
B = start(Self, R, Middle + 1, To),
[ Parent ! {Self, receive {A, X} -> receive {B, Y} -> X+Y end end}
|| _ <- lists:seq(1, R) ].
但大部分时间是VM启动和正常停止开销
$time erl-noshell-run mr start 1024-s erlang halt
1024x358438400
实0m1.172s
用户0m4.110s
系统0m0.150s
$erl
1> 计时器:tc(fun()->mr:start(10241024)end)。
{978453,
[358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400|...]}
请记住,它更像是一个优雅的解决方案,而不是一个高效的解决方案。一个有效的解决方案应该平衡减少树分支和通信开销。尽管对于Erlang来说这根本不是一个好任务,但有一个非常简单的解决方案:
-module(mr).
-export([start/1, start/2]).
start([R, N]) ->
Result = start(list_to_integer(R), list_to_integer(N)),
io:format("~B x ~B~n", [length(Result), hd(Result)]).
start(R, N) ->
Self = self(),
Reducer = start(Self, R, 1, N),
[ receive {Reducer, Result} -> Result end || _ <- lists:seq(1, R) ].
start(Parent, R, N, N) ->
spawn_link(fun() -> mapper(Parent, R, N) end);
start(Parent, R, From, To) ->
spawn_link(fun() -> reducer(Parent, R, From, To) end).
mapper(Parent, R, N) ->
[ Parent ! {self(), N*N} || _ <- lists:seq(1, R) ].
reducer(Parent, R, From, To) ->
Self = self(),
Middle = ( From + To ) div 2,
A = start(Self, R, From, Middle),
B = start(Self, R, Middle + 1, To),
[ Parent ! {Self, receive {A, X} -> receive {B, Y} -> X+Y end end}
|| _ <- lists:seq(1, R) ].
但大部分时间是VM启动和正常停止开销
$time erl-noshell-run mr start 1024-s erlang halt
1024x358438400
实0m1.172s
用户0m4.110s
系统0m0.150s
$erl
1> 计时器:tc(fun()->mr:start(10241024)end)。
{978453,
[358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400|...]}
请记住,它更像是一个优雅的解决方案,而不是一个高效的解决方案。一个有效的解决方案应该平衡减少树分支和通信开销。这是一个多么优雅的解决方案。非常感谢你!这是一个多么优雅的解决方案。非常感谢你!
-module(mr).
-export([start/1, start/2]).
start([R, N]) ->
Result = start(list_to_integer(R), list_to_integer(N)),
io:format("~B x ~B~n", [length(Result), hd(Result)]).
start(R, N) ->
Self = self(),
Reducer = start(Self, R, 1, N),
[ receive {Reducer, Result} -> Result end || _ <- lists:seq(1, R) ].
start(Parent, R, N, N) ->
spawn_link(fun() -> mapper(Parent, R, N) end);
start(Parent, R, From, To) ->
spawn_link(fun() -> reducer(Parent, R, From, To) end).
mapper(Parent, R, N) ->
[ Parent ! {self(), N*N} || _ <- lists:seq(1, R) ].
reducer(Parent, R, From, To) ->
Self = self(),
Middle = ( From + To ) div 2,
A = start(Self, R, From, Middle),
B = start(Self, R, Middle + 1, To),
[ Parent ! {Self, receive {A, X} -> receive {B, Y} -> X+Y end end}
|| _ <- lists:seq(1, R) ].