避免C#虚拟调用的开销

避免C#虚拟调用的开销,c#,virtual-functions,micro-optimization,C#,Virtual Functions,Micro Optimization,我有一些高度优化的数学函数,需要1-2纳秒才能完成。这些函数每秒被调用数亿次,因此尽管性能已经很好,调用开销仍然是一个问题 为了保持程序的可维护性,提供这些方法的类继承了一个IMathFunction接口,以便其他对象可以直接存储特定的数学函数,并在需要时使用它 public interface IMathFunction { double Calculate(double input); double Derivate(double input); } public SomeObje

我有一些高度优化的数学函数,需要
1-2纳秒才能完成。这些函数每秒被调用数亿次,因此尽管性能已经很好,调用开销仍然是一个问题

为了保持程序的可维护性,提供这些方法的类继承了一个
IMathFunction
接口,以便其他对象可以直接存储特定的数学函数,并在需要时使用它

public interface IMathFunction
{
  double Calculate(double input);
  double Derivate(double input);
}

public SomeObject
{
  // Note: There are cases where this is mutable
  private readonly IMathFunction mathFunction_; 

  public double SomeWork(double input, double step)
  {
    var f = mathFunction_.Calculate(input);
    var dv = mathFunction_.Derivate(input);
    return f - (dv * step);
  }
}
与直接调用相比,这个接口由于使用它的代码如何使用而造成了巨大的开销。直接调用需要1-2ns,而虚拟接口调用需要8-9ns。显然,接口的存在及其虚拟调用的后续转换是该场景的瓶颈


如果可能,我希望保留可维护性和性能在实例化对象时,是否有一种方法可以将虚拟函数解析为直接调用,以便所有后续调用都能够避免开销?我假设这将涉及使用IL创建委托,但是我不知道从哪里开始。

因此这有明显的局限性,不应该在任何有接口的地方一直使用,但是如果你有一个地方需要最大化性能,你可以使用泛型:

public SomeObject<TMathFunction> where TMathFunction: struct, IMathFunction 
{
  private readonly TMathFunction mathFunction_;

  public double SomeWork(double input, double step)
  {
    var f = mathFunction_.Calculate(input);
    var dv = mathFunction_.Derivate(input);
    return f - (dv * step);
  }
}
。。。以及结构版本和抽象版本

下面是界面版本的情况。您可以看到它的效率相对较低,因为它执行两个级别的间接寻址:

    return obj.SomeWork(input, step);
sub         esp,40h  
vzeroupper  
vmovaps     xmmword ptr [rsp+30h],xmm6  
vmovaps     xmmword ptr [rsp+20h],xmm7  
mov         rsi,rcx
vmovsd      qword ptr [rsp+60h],xmm2  
vmovaps     xmm6,xmm1
mov         rcx,qword ptr [rsi+8]          ; load mathFunction_ into rcx.
vmovaps     xmm1,xmm6  
mov         r11,7FFED7980020h              ; load vtable address of the IMathFunction.Calculate function.
cmp         dword ptr [rcx],ecx  
call        qword ptr [r11]                ; call IMathFunction.Calculate function which will call the actual Calculate via vtable.
vmovaps     xmm7,xmm0
mov         rcx,qword ptr [rsi+8]          ; load mathFunction_ into rcx.
vmovaps     xmm1,xmm6  
mov         r11,7FFED7980028h              ; load vtable address of the IMathFunction.Derivate function.
cmp         dword ptr [rcx],ecx  
call        qword ptr [r11]                ; call IMathFunction.Derivate function which will call the actual Derivate via vtable.
vmulsd      xmm0,xmm0,mmword ptr [rsp+60h] ; dv * step
vsubsd      xmm7,xmm7,xmm0                 ; f - (dv * step)
vmovaps     xmm0,xmm7  
vmovaps     xmm6,xmmword ptr [rsp+30h]  
vmovaps     xmm7,xmmword ptr [rsp+20h]  
add         rsp,40h  
pop         rsi  
ret  
这是一个抽象类。它的效率稍高一些,但只是可以忽略不计:

        return obj.SomeWork(input, step);
 sub         esp,40h  
 vzeroupper  
 vmovaps     xmmword ptr [rsp+30h],xmm6  
 vmovaps     xmmword ptr [rsp+20h],xmm7  
 mov         rsi,rcx  
 vmovsd      qword ptr [rsp+60h],xmm2  
 vmovaps     xmm6,xmm1  
 mov         rcx,qword ptr [rsi+8]           ; load mathFunction_ into rcx.
 vmovaps     xmm1,xmm6  
 mov         rax,qword ptr [rcx]             ; load object type data from mathFunction_.
 mov         rax,qword ptr [rax+40h]         ; load address of vtable into rax.
 call        qword ptr [rax+20h]             ; call Calculate via offset 0x20 of vtable.
 vmovaps     xmm7,xmm0  
 mov         rcx,qword ptr [rsi+8]           ; load mathFunction_ into rcx.
 vmovaps     xmm1,xmm6  
 mov         rax,qword ptr [rcx]             ; load object type data from mathFunction_.
 mov         rax,qword ptr [rax+40h]         ; load address of vtable into rax.
 call        qword ptr [rax+28h]             ; call Derivate via offset 0x28 of vtable.
 vmulsd      xmm0,xmm0,mmword ptr [rsp+60h]  ; dv * step
 vsubsd      xmm7,xmm7,xmm0                  ; f - (dv * step)
 vmovaps     xmm0,xmm7
 vmovaps     xmm6,xmmword ptr [rsp+30h]  
 vmovaps     xmm7,xmmword ptr [rsp+20h]  
 add         rsp,40h  
 pop         rsi  
 ret  
因此,接口和抽象类都严重依赖分支目标预测来获得可接受的性能。即使这样,你也可以看到还有很多事情要做,所以最好的情况还是相对缓慢,而最坏的情况是由于预测失误导致管道停滞

最后是带有结构的通用版本。您可以看到它的效率大大提高,因为所有内容都已完全内联,因此不涉及分支预测。它还有一个很好的副作用,就是删除了其中的大部分堆栈/参数管理,因此代码变得非常紧凑:

    return obj.SomeWork(input, step);
push        rax  
vzeroupper  
movsx       rax,byte ptr [rcx+8]  
vmovaps     xmm0,xmm1  
vaddsd      xmm0,xmm0,xmm1  ; Calculate - got inlined
vmulsd      xmm1,xmm1,xmm1  ; Derivate - got inlined
vmulsd      xmm1,xmm1,xmm2  ; dv * step
vsubsd      xmm0,xmm0,xmm1  ; f - 
add         rsp,8  
ret  

我会将方法分配给代表。这允许您仍然针对接口编程,同时避免接口方法解析

public SomeObject
{
    private readonly Func<double, double> _calculate;
    private readonly Func<double, double> _derivate;

    public SomeObject(IMathFunction mathFunction)
    {
        _calculate = mathFunction.Calculate;
        _derivate = mathFunction.Derivate;
    }

    public double SomeWork(double input, double step)
    {
        var f = _calculate(input);
        var dv = _derivate(input);
        return f - (dv * step);
    }
}
公共对象
{
私有只读函数计算;
私有只读函数派生;
公共SomeObject(IMathFunction mathFunction)
{
_计算=数学函数。计算;
_导数=数学函数。导数;
}
公共双工(双输入、双步骤)
{
var f=_计算(输入);
var dv=_导数(输入);
返回f-(dv*步);
}
}

作为对@CoryNelson评论的回应,我进行了测试,以了解真正的影响。我已经密封了function类,但这似乎没有什么区别,因为我的方法不是虚拟的

用大括号减去空方法时间的测试结果(1亿次迭代的平均时间,单位为ns):

空工作方法:1.48
接口:5.69(4.21)
代表:5.78(4.30)
密封等级:2.10(0.62)
类别:2.12(0.64)

委托版本时间与接口版本的时间大致相同(具体时间因测试执行而异)。与类相反的工作速度大约快6.8倍(比较时间减去空的工作方法时间)!这意味着我建议与代表一起工作没有帮助

令我惊讶的是,我期望接口版本的执行时间要长得多。由于这种测试并不代表OP代码的确切上下文,因此其有效性受到限制

static class TimingInterfaceVsDelegateCalls
{
    const int N = 100_000_000;
    const double msToNs = 1e6 / N;

    static SquareFunctionSealed _mathFunctionClassSealed;
    static SquareFunction _mathFunctionClass;
    static IMathFunction _mathFunctionInterface;
    static Func<double, double> _calculate;
    static Func<double, double> _derivate;

    static TimingInterfaceVsDelegateCalls()
    {
        _mathFunctionClass = new SquareFunction();
        _mathFunctionClassSealed = new SquareFunctionSealed();
        _mathFunctionInterface = _mathFunctionClassSealed;
        _calculate = _mathFunctionInterface.Calculate;
        _derivate = _mathFunctionInterface.Derivate;
    }

    interface IMathFunction
    {
        double Calculate(double input);
        double Derivate(double input);
    }

    sealed class SquareFunctionSealed : IMathFunction
    {
        public double Calculate(double input)
        {
            return input * input;
        }

        public double Derivate(double input)
        {
            return 2 * input;
        }
    }

    class SquareFunction : IMathFunction
    {
        public double Calculate(double input)
        {
            return input * input;
        }

        public double Derivate(double input)
        {
            return 2 * input;
        }
    }

    public static void Test()
    {
        var stopWatch = new Stopwatch();

        stopWatch.Start();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkEmpty(i);
        }
        stopWatch.Stop();
        double emptyTime = stopWatch.ElapsedMilliseconds * msToNs;
        Console.WriteLine($"Empty Work method: {emptyTime:n2}");

        stopWatch.Restart();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkInterface(i);
        }
        stopWatch.Stop();
        PrintResult("Interface", stopWatch.ElapsedMilliseconds, emptyTime);

        stopWatch.Restart();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkDelegate(i);
        }
        stopWatch.Stop();
        PrintResult("Delegates", stopWatch.ElapsedMilliseconds, emptyTime);

        stopWatch.Restart();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkClassSealed(i);
        }
        stopWatch.Stop();
        PrintResult("Sealed Class", stopWatch.ElapsedMilliseconds, emptyTime);

        stopWatch.Restart();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkClass(i);
        }
        stopWatch.Stop();
        PrintResult("Class", stopWatch.ElapsedMilliseconds, emptyTime);
    }

    private static void PrintResult(string text, long elapsed, double emptyTime)
    {
        Console.WriteLine($"{text}: {elapsed * msToNs:n2} ({elapsed * msToNs - emptyTime:n2})");
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkEmpty(int i)
    {
        return 0.0;
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkInterface(int i)
    {
        double f = _mathFunctionInterface.Calculate(i);
        double dv = _mathFunctionInterface.Derivate(i);
        return f - (dv * 12.34534);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkDelegate(int i)
    {
        double f = _calculate(i);
        double dv = _derivate(i);
        return f - (dv * 12.34534);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkClassSealed(int i)
    {
        double f = _mathFunctionClassSealed.Calculate(i);
        double dv = _mathFunctionClassSealed.Derivate(i);
        return f - (dv * 12.34534);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkClass(int i)
    {
        double f = _mathFunctionClass.Calculate(i);
        double dv = _mathFunctionClass.Derivate(i);
        return f - (dv * 12.34534);
    }
}
静态类计时interfacevsdelegatecalls
{
常数int N=100_000_000;
常数双mston=1e6/N;
静态SquareFunctionSealed _mathFunctionClassSealed;
静态平方函数mathFunctionClass;
静态IMathFunction\u mathFunctionInterface;
静态函数计算;
静态函数导数;
静态计时interfacevsdelegatecalls()
{
_mathFunctionClass=新的平方函数();
_mathFunctionClassSealed=新的SquareFunctionSealed();
_mathFunctionInterface=\u mathFunctionClassSealed;
_calculate=\u mathFunctionInterface.calculate;
_派生=_mathFunctionInterface.derivate;
}
接口IMathFunction
{
双计算(双输入);
双导数(双输入);
}
密封类SquareFunction密封:IMathFunction
{
公共双计算(双输入)
{
返回输入*输入;
}
公共双导数(双输入)
{
返回2*输入;
}
}
类SquareFunction:IMathFunction
{
公共双计算(双输入)
{
返回输入*输入;
}
公共双导数(双输入)
{
返回2*输入;
}
}
公共静态无效测试()
{
var stopWatch=新秒表();
秒表。开始();
对于(int i=0;istatic class TimingInterfaceVsDelegateCalls
{
    const int N = 100_000_000;
    const double msToNs = 1e6 / N;

    static SquareFunctionSealed _mathFunctionClassSealed;
    static SquareFunction _mathFunctionClass;
    static IMathFunction _mathFunctionInterface;
    static Func<double, double> _calculate;
    static Func<double, double> _derivate;

    static TimingInterfaceVsDelegateCalls()
    {
        _mathFunctionClass = new SquareFunction();
        _mathFunctionClassSealed = new SquareFunctionSealed();
        _mathFunctionInterface = _mathFunctionClassSealed;
        _calculate = _mathFunctionInterface.Calculate;
        _derivate = _mathFunctionInterface.Derivate;
    }

    interface IMathFunction
    {
        double Calculate(double input);
        double Derivate(double input);
    }

    sealed class SquareFunctionSealed : IMathFunction
    {
        public double Calculate(double input)
        {
            return input * input;
        }

        public double Derivate(double input)
        {
            return 2 * input;
        }
    }

    class SquareFunction : IMathFunction
    {
        public double Calculate(double input)
        {
            return input * input;
        }

        public double Derivate(double input)
        {
            return 2 * input;
        }
    }

    public static void Test()
    {
        var stopWatch = new Stopwatch();

        stopWatch.Start();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkEmpty(i);
        }
        stopWatch.Stop();
        double emptyTime = stopWatch.ElapsedMilliseconds * msToNs;
        Console.WriteLine($"Empty Work method: {emptyTime:n2}");

        stopWatch.Restart();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkInterface(i);
        }
        stopWatch.Stop();
        PrintResult("Interface", stopWatch.ElapsedMilliseconds, emptyTime);

        stopWatch.Restart();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkDelegate(i);
        }
        stopWatch.Stop();
        PrintResult("Delegates", stopWatch.ElapsedMilliseconds, emptyTime);

        stopWatch.Restart();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkClassSealed(i);
        }
        stopWatch.Stop();
        PrintResult("Sealed Class", stopWatch.ElapsedMilliseconds, emptyTime);

        stopWatch.Restart();
        for (int i = 0; i < N; i++) {
            double result = SomeWorkClass(i);
        }
        stopWatch.Stop();
        PrintResult("Class", stopWatch.ElapsedMilliseconds, emptyTime);
    }

    private static void PrintResult(string text, long elapsed, double emptyTime)
    {
        Console.WriteLine($"{text}: {elapsed * msToNs:n2} ({elapsed * msToNs - emptyTime:n2})");
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkEmpty(int i)
    {
        return 0.0;
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkInterface(int i)
    {
        double f = _mathFunctionInterface.Calculate(i);
        double dv = _mathFunctionInterface.Derivate(i);
        return f - (dv * 12.34534);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkDelegate(int i)
    {
        double f = _calculate(i);
        double dv = _derivate(i);
        return f - (dv * 12.34534);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkClassSealed(int i)
    {
        double f = _mathFunctionClassSealed.Calculate(i);
        double dv = _mathFunctionClassSealed.Derivate(i);
        return f - (dv * 12.34534);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static double SomeWorkClass(int i)
    {
        double f = _mathFunctionClass.Calculate(i);
        double dv = _mathFunctionClass.Derivate(i);
        return f - (dv * 12.34534);
    }
}