Process 如何在Erlang中高效地实现MapReduce示例?

Process 如何在Erlang中高效地实现MapReduce示例?,process,erlang,message-passing,Process,Erlang,Message Passing,我试图比较并发编程语言的性能,比如Go和Erlang。以下Go代码计算平方和(重复计算平方和R次): 1^2+2^2+3^2…1024^2 尽管这对Erlang来说根本不是一项好任务,但有一个非常简单的解决方案: -module(mr). -export([start/1, start/2]). start([R, N]) -> Result = start(list_to_integer(R), list_to_integer(N)), io:format("~B x

我试图比较并发编程语言的性能,比如Go和Erlang。以下Go代码计算平方和(重复计算平方和R次):

1^2+2^2+3^2…1024^2


尽管这对Erlang来说根本不是一项好任务,但有一个非常简单的解决方案:

-module(mr).

-export([start/1, start/2]).

start([R, N]) ->
    Result = start(list_to_integer(R), list_to_integer(N)),
    io:format("~B x ~B~n", [length(Result), hd(Result)]).

start(R, N) ->
    Self = self(),
    Reducer = start(Self, R, 1, N),
    [ receive {Reducer, Result} -> Result end || _ <- lists:seq(1, R) ].

start(Parent, R, N, N) ->
    spawn_link(fun() -> mapper(Parent, R, N) end);
start(Parent, R, From, To) ->
    spawn_link(fun() -> reducer(Parent, R, From, To) end).

mapper(Parent, R, N) ->
    [ Parent ! {self(), N*N}  || _ <- lists:seq(1, R) ].

reducer(Parent, R, From, To) ->
    Self = self(),
    Middle = ( From + To ) div 2,
    A = start(Self, R, From, Middle),
    B = start(Self, R, Middle + 1, To),
    [ Parent ! {Self, receive {A, X} -> receive {B, Y} -> X+Y end end}
      || _ <- lists:seq(1, R) ].
但大部分时间是VM启动和正常停止开销

$time erl-noshell-run mr start 1024-s erlang halt
1024x358438400
实0m1.172s
用户0m4.110s
系统0m0.150s
$erl
1> 计时器:tc(fun()->mr:start(10241024)end)。
{978453,
[358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400|...]}

请记住,它更像是一个优雅的解决方案,而不是一个高效的解决方案。一个有效的解决方案应该平衡减少树分支和通信开销。

尽管对于Erlang来说这根本不是一个好任务,但有一个非常简单的解决方案:

-module(mr).

-export([start/1, start/2]).

start([R, N]) ->
    Result = start(list_to_integer(R), list_to_integer(N)),
    io:format("~B x ~B~n", [length(Result), hd(Result)]).

start(R, N) ->
    Self = self(),
    Reducer = start(Self, R, 1, N),
    [ receive {Reducer, Result} -> Result end || _ <- lists:seq(1, R) ].

start(Parent, R, N, N) ->
    spawn_link(fun() -> mapper(Parent, R, N) end);
start(Parent, R, From, To) ->
    spawn_link(fun() -> reducer(Parent, R, From, To) end).

mapper(Parent, R, N) ->
    [ Parent ! {self(), N*N}  || _ <- lists:seq(1, R) ].

reducer(Parent, R, From, To) ->
    Self = self(),
    Middle = ( From + To ) div 2,
    A = start(Self, R, From, Middle),
    B = start(Self, R, Middle + 1, To),
    [ Parent ! {Self, receive {A, X} -> receive {B, Y} -> X+Y end end}
      || _ <- lists:seq(1, R) ].
但大部分时间是VM启动和正常停止开销

$time erl-noshell-run mr start 1024-s erlang halt
1024x358438400
实0m1.172s
用户0m4.110s
系统0m0.150s
$erl
1> 计时器:tc(fun()->mr:start(10241024)end)。
{978453,
[358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400|...]}

请记住,它更像是一个优雅的解决方案,而不是一个高效的解决方案。一个有效的解决方案应该平衡减少树分支和通信开销。

这是一个多么优雅的解决方案。非常感谢你!这是一个多么优雅的解决方案。非常感谢你!
-module(mr).

-export([start/1, start/2]).

start([R, N]) ->
    Result = start(list_to_integer(R), list_to_integer(N)),
    io:format("~B x ~B~n", [length(Result), hd(Result)]).

start(R, N) ->
    Self = self(),
    Reducer = start(Self, R, 1, N),
    [ receive {Reducer, Result} -> Result end || _ <- lists:seq(1, R) ].

start(Parent, R, N, N) ->
    spawn_link(fun() -> mapper(Parent, R, N) end);
start(Parent, R, From, To) ->
    spawn_link(fun() -> reducer(Parent, R, From, To) end).

mapper(Parent, R, N) ->
    [ Parent ! {self(), N*N}  || _ <- lists:seq(1, R) ].

reducer(Parent, R, From, To) ->
    Self = self(),
    Middle = ( From + To ) div 2,
    A = start(Self, R, From, Middle),
    B = start(Self, R, Middle + 1, To),
    [ Parent ! {Self, receive {A, X} -> receive {B, Y} -> X+Y end end}
      || _ <- lists:seq(1, R) ].