Multithreading t用于性能的并行

Multithreading t用于性能的并行,multithreading,delphi,parallel-processing,delphi-xe7,Multithreading,Delphi,Parallel Processing,Delphi Xe7,给定以下在一维数组中查找奇数的简单任务: begin odds := 0; Ticks := TThread.GetTickCount; for i := 0 to MaxArr-1 do if ArrXY[i] mod 2 = 0 then Inc(odds); Ticks := TThread.GetTickCount - Ticks; writeln('Serial: ' + Ticks.ToString + 'ms, odds: ' + o

给定以下在一维数组中查找奇数的简单任务:

begin
  odds := 0;
  Ticks := TThread.GetTickCount;
  for i := 0 to MaxArr-1 do
      if ArrXY[i] mod 2 = 0 then
        Inc(odds);
  Ticks := TThread.GetTickCount - Ticks;
  writeln('Serial: ' + Ticks.ToString + 'ms, odds: ' + odds.ToString);
end;
看起来这将是并行处理的一个很好的候选者。因此,您可能会尝试使用以下TParallel.For版本:

begin
  odds := 0;
  Ticks := TThread.GetTickCount;
  TParallel.For(0,  MaxArr-1, procedure(I:Integer)
  begin
    if ArrXY[i] mod 2 = 0 then
      inc(odds);
  end);
  Ticks := TThread.GetTickCount - Ticks;
  writeln('Parallel - false odds: ' + Ticks.ToString + 'ms, odds: ' + odds.ToString);
end;
这种并行计算的结果在两个方面有些令人惊讶:

  • 计算的赔率是错误的

  • 执行时间比串行版本长

  • 1) 是可以解释的,因为我们没有保护并发访问的赔率变量。所以为了解决这个问题,我们应该使用
    TInterlocked.Increment(赔率)取而代之

    2) 这也是可以解释的:它展示了

    理想情况下,错误共享问题的解决方案是使用局部变量来存储中间结果,并且仅在所有并行任务结束时将这些中间结果相加。 这是我真正的问题,我无法理解:有没有办法在我的匿名方法中加入一个局部变量?注意,仅仅在匿名方法体中声明局部变量是不可行的,因为每次迭代都会调用匿名方法体。如果这在某种程度上是可行的,那么在每次任务迭代结束时,有没有一种方法可以从匿名方法中获得我的中间结果

    编辑:事实上,我对计算赔率或埃文斯并不感兴趣。我只是用这个来证明效果

    出于完整性原因,这里有一个控制台应用程序演示了这些效果:

    program Project4;
    
    {$APPTYPE CONSOLE}
    
    {$R *.res}
    
    uses
      System.SysUtils, System.Threading, System.Classes, System.SyncObjs;
    
    const
      MaxArr = 100000000;
    
    var
      Ticks: Cardinal;
      i: Integer;
      odds: Integer;
      ArrXY: array of Integer;
    
    procedure FillArray;
    var
      i: Integer;
      j: Integer;
    begin
      SetLength(ArrXY, MaxArr);
      for i := 0 to MaxArr-1 do
          ArrXY[i]:=Random(MaxInt);
    end;
    
    procedure Parallel;
    begin
      odds := 0;
      Ticks := TThread.GetTickCount;
      TParallel.For(0,  MaxArr-1, procedure(I:Integer)
      begin
        if ArrXY[i] mod 2 = 0 then
          TInterlocked.Increment(odds);
      end);
      Ticks := TThread.GetTickCount - Ticks;
      writeln('Parallel: ' + Ticks.ToString + 'ms, odds: ' + odds.ToString);
    end;
    
    procedure ParallelFalseResult;
    begin
      odds := 0;
      Ticks := TThread.GetTickCount;
      TParallel.For(0,  MaxArr-1, procedure(I:Integer)
      begin
        if ArrXY[i] mod 2 = 0 then
          inc(odds);
      end);
      Ticks := TThread.GetTickCount - Ticks;
      writeln('Parallel - false odds: ' + Ticks.ToString + 'ms, odds: ' + odds.ToString);
    end;
    
    procedure Serial;
    begin
      odds := 0;
      Ticks := TThread.GetTickCount;
      for i := 0 to MaxArr-1 do
          if ArrXY[i] mod 2 = 0 then
            Inc(odds);
      Ticks := TThread.GetTickCount - Ticks;
      writeln('Serial: ' + Ticks.ToString + 'ms, odds: ' + odds.ToString);
    end;
    
    begin
      try
        FillArray;
        Serial;
        ParallelFalseResult;
        Parallel;
      except
        on E: Exception do
          Writeln(E.ClassName, ': ', E.Message);
      end;
      Readln;
    end.
    

    我想我们之前讨论过关于OmniThreadLibrary的问题。多线程解决方案的时间较长的主要原因是与实际计算所需的时间相比,
    的并行时间开销

    局部变量在这里没有任何帮助,而全局
    threadvar
    可能会解决错误共享问题。唉,在完成循环后,您可能找不到一种方法来总结所有这些跑步机

    在IIRC中,最好的方法是将任务分成合理的部分,为每次迭代处理一系列数组条目,并增加一个专用于该部分的变量。仅此一点并不能解决错误共享问题,因为即使变量恰好是同一缓存线的一部分,也会发生错误共享问题

    另一种解决方案可能是编写一个类,该类以串行方式处理数组的给定片段,并行处理该类的多个实例,然后评估结果

    顺便说一句:你的代码不计算赔率——它只计算偶数


    并且:有一个名为
    Odd
    的内置函数,通常比您正在使用的
    mod
    代码性能更好。

    关于使用局部变量收集和,然后在最后收集它们的任务,您可以为此使用单独的数组:

    var
      sums: array of Integer;
    begin
      SetLength(sums, MaxArr);
      for I := 0 to MaxArr-1 do
        sums[I] := 0;
    
      Ticks := TThread.GetTickCount;
      TParallel.For(0, MaxArr-1,
        procedure(I:Integer)
        begin
          if ArrXY[i] mod 2 = 0 then
            Inc(sums[I]);
        end
      );
      Ticks := TThread.GetTickCount - Ticks;
    
      odds := 0;
      for I := 0 to MaxArr-1 do
        Inc(odds, sums[i]);
    
      writeln('Parallel - false odds: ' + Ticks.ToString + 'ms, odds: ' + odds.ToString);
    end;
    

    解决这个问题的关键是尽可能少地进行正确的分区和共享

    使用此代码,它的运行速度几乎是串行代码的4倍

    const 
      WorkerCount = 4;
    
    function GetWorker(index: Integer; const oddsArr: TArray<Integer>): TProc;
    var
      min, max: Integer;
    begin
      min := MaxArr div WorkerCount * index;
      if index + 1 < WorkerCount then
        max := MaxArr div WorkerCount * (index + 1) - 1
      else
        max := MaxArr - 1;
      Result :=
        procedure
        var
          i: Integer;
          odds: Integer;
        begin
          odds := 0;
          for i := min to max do
            if Odd(ArrXY[i]) then
              Inc(odds);
          oddsArr[index] := odds;
        end;
    end;
    
    procedure Parallel;
    var
      i: Integer;
      oddsArr: TArray<Integer>;
      workers: TArray<ITask>;
    begin
      odds := 0;
      Ticks := TThread.GetTickCount;
      SetLength(oddsArr, WorkerCount);
      SetLength(workers, WorkerCount);
    
      for i := 0 to WorkerCount-1 do
        workers[i] := TTask.Run(GetWorker(i, oddsArr));
      TTask.WaitForAll(workers);
    
      for i := 0 to WorkerCount-1 do
        Inc(odds, oddsArr[i]);
      Ticks := TThread.GetTickCount - Ticks;
      writeln('Parallel: ' + Ticks.ToString + 'ms, odds: ' + odds.ToString);
    end;
    
    const
    工人计数=4;
    函数GetWorker(索引:整数;常量oddsar:TArray):TProc;
    变量
    最小值,最大值:整数;
    开始
    最小值:=MaxArr div WorkerCount*索引;
    如果索引+1<工作计数,则
    max:=MaxArr div WorkerCount*(索引+1)-1
    其他的
    max:=MaxArr-1;
    结果:=
    程序
    变量
    i:整数;
    赔率:整数;
    开始
    赔率:=0;
    对于i:=最小到最大do
    如果为奇数(ArrXY[i]),则
    公司(赔率);
    oddsArr[指数]:=赔率;
    结束;
    结束;
    程序并行;
    变量
    i:整数;
    奥德萨尔:焦油;
    工人:柏油;
    开始
    赔率:=0;
    Ticks:=TThread.GetTickCount;
    设置长度(ODDSAR,工作计数);
    设置长度(工人、工人计数);
    对于i:=0到WorkerCount-1 do
    workers[i]:=TTask.Run(GetWorker(i,oddsar));
    TTask.WaitForAll(工人);
    对于i:=0到WorkerCount-1 do
    股份有限公司(赔率,奥德萨尔[i]);
    Ticks:=TThread.GetTickCount-Ticks;
    writeln('Parallel:'+Ticks.ToString+'ms,赔率:'+bits.ToString);
    结束;
    
    您可以使用TParallel.For编写类似的代码,但它的运行速度仍然比仅使用TTask要慢一些(比如比串行快3倍)

    顺便说一句,我使用该函数返回worker TProc以获得正确的索引捕获。如果在同一例程中的循环中运行它,则捕获循环变量

    更新日期:2014年12月19日:

    由于我们发现关键在于正确的分区,因此可以很容易地将其放入并行for循环,而无需将其锁定在特定的数据结构上:

    procedure ParallelFor(lowInclusive, highInclusive: Integer;
      const iteratorRangeEvent: TProc<Integer, Integer>);
    
      procedure CalcPartBounds(low, high, count, index: Integer;
        out min, max: Integer);
      var
        len: Integer;
      begin
        len := high - low + 1;
        min := (len div count) * index;
        if index + 1 < count then
          max := len div count * (index + 1) - 1
        else
          max := len - 1;
      end;
    
      function GetWorker(const iteratorRangeEvent: TProc<Integer, Integer>;
        min, max: Integer): ITask;
      begin
        Result := TTask.Run(
          procedure
          begin
            iteratorRangeEvent(min, max);
          end)
      end;
    
    var
      workerCount: Integer;
      workers: TArray<ITask>;
      i, min, max: Integer;
    begin
      workerCount := TThread.ProcessorCount;
      SetLength(workers, workerCount);
      for i := 0 to workerCount - 1 do
      begin
        CalcPartBounds(lowInclusive, highInclusive, workerCount, i, min, max);
        workers[i] := GetWorker(iteratorRangeEvent, min, max);
      end;
      TTask.WaitForAll(workers);
    end;
    
    procedure Parallel4;
    begin
      odds := 0;
      Ticks := TThread.GetTickCount;
      ParallelFor(0, MaxArr-1,
        procedure(min, max: Integer)
        var
          i, n: Integer;
        begin
          n := 0;
          for i := min to max do
            if Odd(ArrXY[i]) then
              Inc(n);
          AtomicIncrement(odds, n);
        end);
      Ticks := TThread.GetTickCount - Ticks;
      writeln('ParallelEx: Stefan Glienke ' + Ticks.ToString + ' ms, odds: ' + odds.ToString);
    end;
    
    procedure-ParallelFor(低包容、高包容:整数;
    常量iteratorRangeEvent:TProc);
    过程CalcPartBounds(低、高、计数、索引:整数;
    输出最小值,最大值:整数);
    变量
    len:整数;
    开始
    len:=高-低+1;
    最小值:=(len div count)*索引;
    如果索引+1<计数,则
    最大值:=len div count*(索引+1)-1
    其他的
    最大值:=len-1;
    结束;
    函数GetWorker(const iteratorRangeEvent:TProc;
    最小值,最大值:整数):ITask;
    开始
    结果:=TTask.Run(
    程序
    开始
    迭代器范围事件(最小值、最大值);
    (完)
    结束;
    变量
    workerCount:整数;
    工人:柏油;
    i、 最小值,最大值:整数;
    开始
    workerCount:=TThread.ProcessorCount;
    设置长度(工人、工人计数);
    对于i:=0到workerCount-1 do
    开始
    CalcPartBounds(低包容性、高包容性、工作计数、i、最小值、最大值);
    workers[i]:=GetWorker(iteratorRangeEvent,min,max);
    结束;
    TTask.WaitForAll(工人);
    结束;
    程序并行4;
    开始
    赔率:=0;
    Ticks:=TThread.GetTickCount;
    (0,MaxArr-1,
    过程(最小值、最大值:整数)
    变量
    i、 n:整数;
    开始
    n:=0;
    对于i:=最小到最大do
    如果为奇数(ArrXY[i]),则
    公司(n),;
    原子增量(赔率,n);
    (完),;
    Ticks:=TThread.GetTickCount-Ticks;
    writeln('ParallelEx:Stefan Glienke'+Ticks.ToString+'ms,赔率:'+赔率.ToString
    
    function CountParallelOTL: integer;
    var
      counters: array of integer;
      numCores: integer;
      i: integer;
    begin
      numCores := Environment.Process.Affinity.Count;
      SetLength(counters, numCores);
      FillChar(counters[0], Length(counters) * SizeOf(counters[0]), 0);
    
      Parallel.For(0, MaxArr - 1)
        .NumTasks(numCores)
        .Execute(
          procedure(taskIndex, value: integer)
          begin
            if Odd(ArrXY[value]) then
              Inc(counters[taskIndex]);
          end);
    
      Result := counters[0];
      for i := 1 to numCores - 1 do
        Inc(Result, counters[i]);
    end;
    
    program ParallelCount;
    
    {$APPTYPE CONSOLE}
    
    {$R *.res}
    
    uses
      System.SyncObjs,
      System.Classes,
      System.SysUtils,
      System.Threading,
      DSiWin32,
      OtlCommon,
      OtlParallel;
    
    const
      MaxArr = 100000000;
    
    var
      Ticks: Cardinal;
      i: Integer;
      odds: Integer;
      ArrXY: array of Integer;
    
    procedure FillArray;
    var
      i: Integer;
      j: Integer;
    begin
      SetLength(ArrXY, MaxArr);
      for i := 0 to MaxArr-1 do
        ArrXY[i]:=Random(MaxInt);
    end;
    
    function CountSerial: integer;
    var
      odds: integer;
    begin
      odds := 0;
      for i := 0 to MaxArr-1 do
          if Odd(ArrXY[i]) then
            Inc(odds);
      Result := odds;
    end;
    
    function CountParallelOTL: integer;
    var
      counters: array of integer;
      numCores: integer;
      i: integer;
    begin
      numCores := Environment.Process.Affinity.Count;
      SetLength(counters, numCores);
      FillChar(counters[0], Length(counters) * SizeOf(counters[0]), 0);
    
      Parallel.For(0, MaxArr - 1)
        .NumTasks(numCores)
        .Execute(
          procedure(taskIndex, value: integer)
          begin
            if Odd(ArrXY[value]) then
              Inc(counters[taskIndex]);
          end);
    
      Result := counters[0];
      for i := 1 to numCores - 1 do
        Inc(Result, counters[i]);
    end;
    
    function GetWorker(index: Integer; const oddsArr: TArray<Integer>; workerCount: integer): TProc;
    var
      min, max: Integer;
    begin
      min := MaxArr div workerCount * index;
      if index + 1 < workerCount then
        max := MaxArr div workerCount * (index + 1) - 1
      else
        max := MaxArr - 1;
      Result :=
        procedure
        var
          i: Integer;
          odds: Integer;
        begin
          odds := 0;
          for i := min to max do
            if Odd(ArrXY[i]) then
              Inc(odds);
          oddsArr[index] := odds;
        end;
    end;
    
    function CountParallelXE7Tasks: integer;
    var
      i: Integer;
      oddsArr: TArray<Integer>;
      workers: TArray<ITask>;
      workerCount: integer;
    begin
      workerCount := Environment.Process.Affinity.Count;
      odds := 0;
      Ticks := TThread.GetTickCount;
      SetLength(oddsArr, workerCount);
      SetLength(workers, workerCount);
    
      for i := 0 to workerCount-1 do
        workers[i] := TTask.Run(GetWorker(i, oddsArr, workerCount));
      TTask.WaitForAll(workers);
    
      for i := 0 to workerCount-1 do
        Inc(odds, oddsArr[i]);
      Result := odds;
    end;
    
    function CountParallelXE7For: integer;
    var
      odds: integer;
    begin
      odds := 0;
      TParallel.For(0,  MaxArr-1, procedure(I:Integer)
      begin
        if Odd(ArrXY[i]) then
          TInterlocked.Increment(odds);
      end);
      Result := odds;
    end;
    
    procedure Count(const name: string; func: TFunc<integer>);
    var
      time: int64;
      cnt: integer;
    begin
      time := DSiTimeGetTime64;
      cnt := func();
      time := DSiElapsedTime64(time);
      Writeln(name, ': ', cnt, ' odd elements found in ', time, ' ms');
    end;
    
    begin
      try
        FillArray;
    
        Count('Serial', CountSerial);
        Count('Parallel (OTL)', CountParallelOTL);
        Count('Parallel (XE7 tasks)', CountParallelXE7Tasks);
        Count('Parallel (XE7 for)', CountParallelXE7For);
    
        Readln;
      except
        on E: Exception do
          Writeln(E.ClassName, ': ', E.Message);
      end;
    end.
    
    program Project4;
    
    {$APPTYPE CONSOLE}
    
    {$R *.res}
    
    uses
      System.SysUtils, System.Threading, System.Classes, System.SyncObjs;
    
    const
      MaxArr = 100000000;
    
    var
      Ticks: Cardinal;
      i: Integer;
      odds: Integer;
      ArrXY: TArray<Integer>;
    
    type
    
    TParallelEx<TSource, TResult> = class
      private
        class function GetWorker(body: TFunc<TArray<TSource>, Integer, Integer, TResult>; source: TArray<TSource>; min, max: Integer): TFunc<TResult>;
      public
        class procedure &For(source: TArray<TSource>;
                             body: TFunc<TArray<TSource>, Integer, Integer, TResult>;
                             aggregator: TProc<TResult>);
      end;
    
    procedure FillArray;
    var
      i: Integer;
      j: Integer;
    begin
      SetLength(ArrXY, MaxArr);
      for i := 0 to MaxArr-1 do
          ArrXY[i]:=Random(MaxInt);
    end;
    
    procedure Parallel;
    begin
      odds := 0;
      Ticks := TThread.GetTickCount;
      TParallel.For(0,  MaxArr-1, procedure(I:Integer)
      begin
        if ArrXY[i] mod 2 <> 0 then
          TInterlocked.Increment(odds);
      end);
      Ticks := TThread.GetTickCount - Ticks;
      writeln('Parallel: ' + Ticks.ToString + 'ms, odds: ' + odds.ToString);
    end;
    
    procedure Serial;
    begin
      odds := 0;
      Ticks := TThread.GetTickCount;
      for i := 0 to MaxArr-1 do
          if ArrXY[i] mod 2 <> 0 then
            Inc(odds);
      Ticks := TThread.GetTickCount - Ticks;
      writeln('Serial: ' + Ticks.ToString + 'ms, odds: ' + odds.ToString);
    end;
    
    const
      WorkerCount = 4;
    
    function GetWorker(index: Integer; const oddsArr: TArray<Integer>): TProc;
    var
      min, max: Integer;
    begin
      min := MaxArr div WorkerCount * index;
      if index + 1 < WorkerCount then
        max := MaxArr div WorkerCount * (index + 1) - 1
      else
        max := MaxArr - 1;
      Result :=
        procedure
        var
          i: Integer;
          odds: Integer;
        begin
          odds := 0;
          for i := min to max do
            if ArrXY[i] mod 2 <> 0 then
              Inc(odds);
          oddsArr[index] := odds;
        end;
    end;
    
    procedure Parallel2;
    var
      i: Integer;
      oddsArr: TArray<Integer>;
      workers: TArray<ITask>;
    begin
      odds := 0;
      Ticks := TThread.GetTickCount;
      SetLength(oddsArr, WorkerCount);
      SetLength(workers, WorkerCount);
    
      for i := 0 to WorkerCount-1 do
        workers[i] := TTask.Run(GetWorker(i, oddsArr));
      TTask.WaitForAll(workers);
    
      for i := 0 to WorkerCount-1 do
        Inc(odds, oddsArr[i]);
      Ticks := TThread.GetTickCount - Ticks;
      writeln('Parallel: Stefan Glienke ' + Ticks.ToString + ' ms, odds: ' + odds.ToString);
    end;
    
    procedure parallel3;
    var
      sum: Integer;
    begin
      Ticks := TThread.GetTickCount;
      TParallelEx<Integer, Integer>.For( ArrXY,
         function(Arr: TArray<Integer>; min, max: Integer): Integer
          var
            i: Integer;
            res: Integer;
          begin
            res := 0;
            for i := min to max do
              if Arr[i] mod 2 <> 0 then
                Inc(res);
            Result := res;
          end,
          procedure(res: Integer) begin sum := sum + res; end );
      Ticks := TThread.GetTickCount - Ticks;
      writeln('ParallelEx: Markus Joos ' + Ticks.ToString + ' ms, odds: ' + odds.ToString);
    end;
    
    { TParallelEx<TSource, TResult> }
    
    class function TParallelEx<TSource, TResult>.GetWorker(body: TFunc<TArray<TSource>, Integer, Integer, TResult>; source: TArray<TSource>; min, max: Integer): TFunc<TResult>;
    begin
      Result := function: TResult
      begin
        Result := body(source, min, max);
      end;
    end;
    
    class procedure TParallelEx<TSource, TResult>.&For(source: TArray<TSource>;
      body: TFunc<TArray<TSource>, Integer, Integer, TResult>;
      aggregator: TProc<TResult>);
    var
      I: Integer;
      workers: TArray<IFuture<TResult>>;
      workerCount: Integer;
      min, max: integer;
      MaxIndex: Integer;
    begin
      workerCount := TThread.ProcessorCount;
      SetLength(workers, workerCount);
      MaxIndex := length(source);
      for I := 0 to workerCount -1 do
      begin
        min := (MaxIndex div WorkerCount) * I;
        if I + 1 < WorkerCount then
          max := MaxIndex div WorkerCount * (I + 1) - 1
        else
          max := MaxIndex - 1;
        workers[i]:= TTask.Future<TResult>(GetWorker(body, source, min, max));
      end;
      for i:= 0 to workerCount-1 do
      begin
        aggregator(workers[i].Value);
      end;
    end;
    
    begin
      try
        FillArray;
        Serial;
        Parallel;
        Parallel2;
        Parallel3;
      except
        on E: Exception do
          Writeln(E.ClassName, ': ', E.Message);
      end;
      Readln;
    end.