C++ 整数运算符！=和==与零比较时_C++_Performance_Assembly_Machine Code

C++ 整数运算符！=和==与零比较时

c++ performance assembly

C++ 整数运算符！=和==与零比较时,c++,performance,assembly,machine-code,C++,Performance,Assembly,Machine Code,我找到了和==不是测试零或非零的最快方法 bool nonZero1 = integer != 0; xor eax, eax test ecx, ecx setne al bool nonZero2 = integer < 0 || integer > 0; test ecx, ecx setne al bool zero1 = integer == 0; xor eax, eax test ecx, ecx sete al bool zero2 = !(integer &l

我找到了和==不是测试零或非零的最快方法

bool nonZero1 = integer != 0;
xor eax, eax
test ecx, ecx
setne al

bool nonZero2 = integer < 0 || integer > 0;
test ecx, ecx
setne al

bool zero1 = integer == 0;
xor eax, eax
test ecx, ecx
sete al

bool zero2 = !(integer < 0 || integer > 0);
test ecx, ecx
sete al

bool nonZero1=整数！=0;
异或eax，eax
测试ecx，ecx
塞特纳
布尔非零2=整数<0 | |整数>0；
测试ecx，ecx
塞特纳
布尔零1=整数==0；
异或eax，eax
测试ecx，ecx
塞特艾尔
boolzero2=！（整数<0 | |整数>0）；
测试ecx，ecx
塞特艾尔

编译器：VC++11 优化标志：/O2/GL/LTCG

这是x86-32的程序集输出。两种比较的第二个版本在x86-32和x86-64上都快了约12%。然而，在x86-64上，指令是相同的（第一个版本看起来与第二个版本完全相同），但第二个版本的速度更快

为什么编译器不能在x86-32上生成更快的版本

当程序集输出相同时，为什么x86-64上的第二个版本速度更快

编辑：我添加了基准测试代码。零点：1544ms，1358ms非零点：1544ms，1358ms 或

注意：在单个源文件中编译时，可能很难找到这些函数，因为main.asm非常大。我在一个单独的源文件中有zero1，zero2，nonZero1，nonZero2

EDIT2：同时安装了VC++11和VC++2010的人是否可以运行基准测试代码并公布时间？这可能确实是VC++11中的一个bug。

这是一个很好的问题，但我认为您已经成为编译器依赖性分析的受害者

编译器只需清除

eax

的高位一次，对于第二个版本，它们仍保持清除状态。第二个版本将不得不向xor eax、eax支付代价，除非编译器分析证明它被第一个版本清除

第二个版本可以通过利用编译器在第一个版本中所做的工作来“作弊”。

你是如何测量时间的？它是在循环中的“（版本一，后跟版本二）”，还是在循环中的“（版本一）后跟（版本二）”

不要在同一个程序中同时进行这两个测试（而是为每个版本重新编译），或者如果您这样做，请同时测试“版本A优先”和“版本B优先”，并查看是否以先到者为准

作弊的例子：

timer1.start();
double x1 = 2 * sqrt(n + 37 * y + exp(z));
timer1.stop();
timer2.start();
double x2 = 31 * sqrt(n + 37 * y + exp(z));
timer2.stop();

如果

timer2

duration小于

timer1

duration，我们不能得出乘以31比乘以2快的结论。相反，我们意识到编译器执行了公共子表达式分析，代码变成：

timer1.start();
double common = sqrt(n + 37 * y + exp(z));
double x1 = 2 * common;
timer1.stop();
timer2.start();
double x2 = 31 * common;
timer2.stop();

唯一被证明的是，乘以31比计算

common

要快。这一点也不奇怪——乘法远比

sqrt

和

exp

快

编辑：为我的代码查看OP的程序集列表。我甚至怀疑这是VS2011现在的一个普遍缺陷。这可能只是OP代码的一个特例bug。我使用Clang3.2、GCC4.6.2和VS2010运行OP的代码，在所有情况下，最大差异约为1%

刚刚编译了源代码，对我的

ne.c

文件和

/O2

和

/GL

标志进行了适当的修改。这是消息来源

int ne1(int n) {
 return n != 0;
 }

 int ne2(int n) {
 return n < 0 || n > 0;
 }

 int ne3(int n) {
 return !(n == 0);
 }

int main() { int p = ne1(rand()), q = ne2(rand()), r = ne3(rand());}

使用

和

运算符的

ne2（）

显然更昂贵

ne1（）

和

ne3（）

使用

==

和

=运算符分别是更简洁和等效的
VisualStudio2011正在测试阶段。我认为这是一个错误。我使用另外两个编译器，即GCC4.6.2和Clang3.2，以及O2
optimization开关，对我在Windows7上的所有三个测试产生了完全相同的程序集。以下是总结：
$ cat ne.c

#include <stdbool.h>
bool ne1(int n) {
    return n != 0;
}

bool ne2(int n) {
    return n < 0 || n > 0;
}

bool ne3(int n) {
    return !(n != 0);
}

int main() {}

当啷一声：
    .def     _ne1;
    .scl    2;
    .type   32;
    .endef
    .text
    .globl  _ne1
    .align  16, 0x90
_ne1:
    cmpl    $0, 4(%esp)
    setne   %al
    movzbl  %al, %eax
    ret

    .def     _ne2;
    .scl    2;
    .type   32;
    .endef
    .globl  _ne2
    .align  16, 0x90
_ne2:
    cmpl    $0, 4(%esp)
    setne   %al
    movzbl  %al, %eax
    ret

    .def     _ne3;
    .scl    2;
    .type   32;
    .endef
    .globl  _ne3
    .align  16, 0x90
_ne3:
    cmpl    $0, 4(%esp)
    sete    %al
    movzbl  %al, %eax
    ret

    .def     _main;
    .scl    2;
    .type   32;
    .endef
    .globl  _main
    .align  16, 0x90
_main:
    pushl   %ebp
    movl    %esp, %ebp
    calll   ___main
    xorl    %eax, %eax
    popl    %ebp
    ret

我的建议是将此作为bug提交。
注释：我把它们编译成C源代码，因为我不认为使用相应的C++编译器会在这里做任何重大的改变。
你会提供你用来评估性能的完整程序吗？那么，如果仅仅跳过XOR，它又如何保证其余的Eax是零？从…起它们看起来与测试无关，因此应该是周围代码的一部分。如果更改顺序会发生什么？编译器足够聪明，知道在第一次测试之前它有xor
：edeax
，并且在下一次测试中仍然有效…NFRCR，您真的将其作为线性代码进行基准测试了吗？我假设您只是将它们粘贴在一起以保持文章的大小不变。添加了基准测试代码。我分别运行了benchmark1和benchmark2，结果相同。唯一的区别是第一个运行的基准测试，然后它“升温”并且有点慢。这有点不相关，但编译器不会将乘法优化为31到（common@Matt:不是浮点乘法，它不会；）对于整数乘法，是的，我想大多数编译器都知道这个技巧，但取决于架构，它可能更快，也可能更快。IMUL乘以2几乎肯定会转换为左移位。您的新测试出错了，编译器执行了常量传播，因为它总是确定n=10。然后，最重要的是，它完全消除了函数调用，因为结果没有被使用，也没有副作用。@dirkgenty:当涉及到优化器问题时，上下文就是一切。它不是一个BUG！如果编译后的代码表现出应有的行为，那么它怎么可能是一个bug呢？这表明乐观主义者有改进的空间，但每个乐观主义者都有改进的空间。（顺便说一下，这是一个定理）VisualC++的bug可能会被报告。
_ne1:
LFB0:
    .cfi_startproc
    movl    4(%esp), %eax
    testl   %eax, %eax
    setne   %al
    ret
    .cfi_endproc
LFE0:
    .p2align 2,,3
    .globl  _ne2
    .def    _ne2;   .scl    2;  .type   32; .endef
_ne2:
LFB1:
    .cfi_startproc
    movl    4(%esp), %edx
    testl   %edx, %edx
    setne   %al
    ret
    .cfi_endproc
LFE1:
    .p2align 2,,3
    .globl  _ne3
    .def    _ne3;   .scl    2;  .type   32; .endef
_ne3:
LFB2:
    .cfi_startproc
    movl    4(%esp), %ecx
    testl   %ecx, %ecx
    sete    %al
    ret
    .cfi_endproc
LFE2:
    .def    ___main;    .scl    2;  .type   32; .endef
    .section    .text.startup,"x"
    .p2align 2,,3
    .globl  _main
    .def    _main;  .scl    2;  .type   32; .endef
_main:
LFB3:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    andl    $-16, %esp
    call    ___main
    xorl    %eax, %eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
LFE3:

    .def     _ne1;
    .scl    2;
    .type   32;
    .endef
    .text
    .globl  _ne1
    .align  16, 0x90
_ne1:
    cmpl    $0, 4(%esp)
    setne   %al
    movzbl  %al, %eax
    ret

    .def     _ne2;
    .scl    2;
    .type   32;
    .endef
    .globl  _ne2
    .align  16, 0x90
_ne2:
    cmpl    $0, 4(%esp)
    setne   %al
    movzbl  %al, %eax
    ret

    .def     _ne3;
    .scl    2;
    .type   32;
    .endef
    .globl  _ne3
    .align  16, 0x90
_ne3:
    cmpl    $0, 4(%esp)
    sete    %al
    movzbl  %al, %eax
    ret

    .def     _main;
    .scl    2;
    .type   32;
    .endef
    .globl  _main
    .align  16, 0x90
_main:
    pushl   %ebp
    movl    %esp, %ebp
    calll   ___main
    xorl    %eax, %eax
    popl    %ebp
    ret