C# net 4及更高版本中的数组边界检查效率

C# net 4及更高版本中的数组边界检查效率,c#,.net,performance,bounds-check-elimination,C#,.net,Performance,Bounds Check Elimination,我对.net中的低级算法的效率感兴趣。我想让我们选择在C语言中编写更多代码而不是C++,但是其中一个绊脚石是在.NET中使用循环和随机访问数组来进行边界检查。 一个激励示例是一个函数,该函数计算两个数组中相应元素的乘积之和(这是两个向量的点积) 我根据doug65536的回答做了一些调查。在C++中,比较了一个边界检查的时间: for(int i=0; i<n; ++i) sum += v1[i]*v2[i]; 这比第一次慢了35%,值得关注。我在这个问题上做了更多的调查。有趣的是,似

for(int i=0; i<n; ++i) sum += v1[i]*v2[i];



  • 边界检查由

  • 边界检查是一个前向分支,它将被静态地预测为不执行,这也降低了成本。这树枝永远不会被拿走。(如果发生过,无论如何都会抛出异常,因此预测失误的成本变得完全无关)

  • 一旦出现内存延迟,推测性执行将使循环的许多迭代排队,因此解码额外指令对的成本几乎消失


64位 64位抖动在消除边界检查方面做得很好(至少在简单的情况下)。我加了
返回和,然后在发布模式下使用Visual Studio 2010编译程序。在下面的反汇编(我用C#翻译进行了注释)中,请注意:

  • 即使您的代码将
  • 在主循环之前,有一个检查来确保
  • 主循环(偏移量00000032到00000052)不包含任何边界检查

; Register assignments:
;    rcx  := i
;    rdx  := X
;    r8   := Y
;    r9   := X.Length ("length" in your code, "XLength" below)
;    r10  := Y.Length ("YLength" below)
;    r11  := X.Length - 1 ("XLengthMinus1" below)
;    xmm1 := sum

; (Prologue)
00000000  push        rbx
00000001  push        rdi
00000002  sub         rsp,28h

; (Store arguments X and Y in rdx and r8)
00000006  mov         r8,rdx   ; Y
00000009  mov         rdx,rcx  ; X

; int XLength = X.Length;
0000000c  mov         r9,qword ptr [rdx+8]

; int XLengthMinus1 = XLength - 1;
00000010  movsxd      rax,r9d
00000013  lea         r11,[rax-1]

; int YLength = Y.Length;
00000017  mov         r10,qword ptr [r8+8]

; if (XLength != YLength)
;     throw new ArgumentException("X and Y must be same size");
0000001b  cmp         r9d,r10d
0000001e  jne         0000000000000060

; double sum = 0;
00000020  xorpd       xmm1,xmm1

; if (XLength > 0)
; {
00000024  test        r9d,r9d
00000027  jle         0000000000000054

;     int i = 0;
00000029  xor         ecx,ecx
0000002b  xor         eax,eax

;     if (XLengthMinus1 >= YLength)
;         throw new IndexOutOfRangeException();
0000002d  cmp         r11,r10
00000030  jae         0000000000000096

;     do
;     {
;         sum += X[i] * Y[i];
00000032  movsd       xmm0,mmword ptr [rdx+rax+10h]
00000038  mulsd       xmm0,mmword ptr [r8+rax+10h]
0000003f  addsd       xmm0,xmm1
00000043  movapd      xmm1,xmm0

;         i++;
00000047  inc         ecx
00000049  add         rax,8

;     }
;     while (i < XLength);
0000004f  cmp         ecx,r9d
00000052  jl          0000000000000032
; }

; return sum;
00000054  movapd      xmm0,xmm1

; (Epilogue)
00000058  add         rsp,28h
0000005c  pop         rdi
0000005d  pop         rbx
0000005e  ret

00000060  ...

00000096  ...
; Register assignments:
;    eax  := i
;    ecx  := X
;    edx  := Y
;    esi  := X.Length ("length" in your code, "XLength" below)

; (Prologue)
00000000  push        ebp
00000001  mov         ebp,esp
00000003  push        esi

; double sum = 0;
00000004  fldz

; int XLength = X.Length;
00000006  mov         esi,dword ptr [ecx+4]

; if (XLength != Y.Length)
;     throw new ArgumentException("X and Y must be same size");
00000009  cmp         dword ptr [edx+4],esi
0000000c  je          00000012
0000000e  fstp        st(0)
00000010  jmp         0000002F

; int i = 0;
00000012  xor         eax,eax

; if (XLength > 0)
; {
00000014  test        esi,esi
00000016  jle         0000002C

;     do
;     {
;         double temp = X[i];
00000018  fld         qword ptr [ecx+eax*8+8]

;         if (i >= Y.Length)
;             throw new IndexOutOfRangeException();
0000001c  cmp         eax,dword ptr [edx+4]
0000001f  jae         0000005A

;         sum += temp * Y[i];
00000021  fmul        qword ptr [edx+eax*8+8]
00000025  faddp       st(1),st

;         i++;
00000027  inc         eax

;     while (i < XLength);
00000028  cmp         eax,esi
0000002a  jl          00000018
; }

; return sum;
0000002c  pop         esi
0000002d  pop         ebp
0000002e  ret

0000002f  ...

0000005a  ...
总结 自2009年以来,抖动有所改善,64位抖动可以生成比32位抖动更有效的代码





  • Foreach循环比For循环快
  • 局部变量比数组
  • 使用

using System;
using System.Diagnostics;
using System.Runtime;

namespace demo
    class MainClass
        static bool ByForArrayLength (byte[] data)
            for (int i = 0; i < data.Length; i++)
                if (data [i] != 0)
                    return false;
            return true;

        static bool ByForLocalLength (byte[] data)
            int len = data.Length;
            for (int i = 0; i < len; i++)
                if (data [i] != 0)
                    return false;
            return true;

        static unsafe bool ByForUnsafe (byte[] data)
            fixed (byte* datap = data)
                int len = data.Length;
                for (int i = 0; i < len; i++)
                    if (datap [i] != 0)
                        return false;
                return true;

        static bool ByForeach (byte[] data)
            foreach (byte b in data)
                if (b != 0)
                    return false;
            return true;

        static void Measure (Action work, string description)
            GCSettings.LatencyMode = GCLatencyMode.LowLatency;
            var watch = Stopwatch.StartNew ();
            work.Invoke ();
            Console.WriteLine ("{0,-40}: {1} ms", description, watch.Elapsed.TotalMilliseconds);

        public static void Main (string[] args)
            byte[] data = new byte[256 * 1024 * 1024];
            Measure (() => ByForArrayLength (data), "For with .Length property");
            Measure (() => ByForLocalLength (data), "For with local variable");
            Measure (() => ByForUnsafe (data), "For with local variable and GC-pinning");
            Measure (() => ByForeach (data), "Foreach loop");

for(int i=0; i<n1 && i <n2 && i <n3; ++i) sum += v1[i]*v2[i];
private static unsafe double SumProductPointer(double[] X, double[] Y)
    double sum = 0;
    int length = X.Length;
    if (length != Y.Length)
        throw new ArgumentException("X and Y must be same size");
    fixed (double* xp = X, yp = Y)
        for (int i = 0; i < length; i++)
            sum += xp[i] * yp[i];
    return sum;
private static double SumProduct(double[] X, double[] Y)
    double sum = 0;
    int length = X.Length;
    if (length != Y.Length)
        throw new ArgumentException("X and Y must be same size");
    for (int i = 0; i < length; i++)
        sum += X[i] * Y[i];
    return sum;

$ mcs Program.cs -optimize -unsafe
For with .Length property               : 440,9208 ms
For with local variable                 : 333,2252 ms
For with local variable and GC-pinning  : 330,2205 ms
Foreach loop                            : 280,5205 ms