SSE2 MOVDUP不移动值

SSE2 MOVDUP不移动值,d,sse,sse2,D,Sse,Sse2,有人能解释一下为什么这个程序的输出是[nan,nan]?代码应该将d的值加载到XMM1寄存器的高64位和低64位,然后将XMM1的内容移动到a中。由于a未初始化为一组特定值,因此D将每个元素初始化为nan。如果movupd指令不在objdump中,我会理解结果,但指令在那里。想法 import std.stdio; void main() { enum double d = 1.0 / cast(double)2; double[] a = new double[2];

有人能解释一下为什么这个程序的输出是
[nan,nan]
?代码应该将
d
的值加载到XMM1寄存器的高64位和低64位,然后将XMM1的内容移动到
a
中。由于
a
未初始化为一组特定值,因此D将每个元素初始化为
nan
。如果
movupd
指令不在objdump中,我会理解结果,但指令在那里。想法

import std.stdio;

void main()
{
    enum double d = 1.0 / cast(double)2;
    double[] a = new double[2];
    auto aptr = a.ptr;

    asm
    {
        movddup XMM1, d;
        movupd [aptr], XMM1;
    }
    writeln(a);
}
以下是主函数的objdump:

0000000000426b88 <_Dmain>:
  426b88:       55                      push   %rbp
  426b89:       48 8b ec                mov    %rsp,%rbp
  426b8c:       48 83 ec 50             sub    $0x50,%rsp
  426b90:       f2 48 0f 10 05 77 81    rex.W movsd 0x28177(%rip),%xmm0
  426b97:       02 00 
  426b99:       f2 48 0f 11 45 b0       rex.W movsd %xmm0,-0x50(%rbp)
  426b9f:       48 be 02 00 00 00 00    movabs $0x2,%rsi
  426ba6:       00 00 00 
  426ba9:       f2 48 0f 10 05 66 81    rex.W movsd 0x28166(%rip),%xmm0
  426bb0:       02 00 
  426bb2:       48 8d 7d c0             lea    -0x40(%rbp),%rdi
  426bb6:       e8 65 d1 00 00          callq  433d20 <_memsetDouble>
  426bbb:       f2 48 0f 10 0d 4c 81    rex.W movsd 0x2814c(%rip),%xmm1
  426bc2:       02 00 
  426bc4:       f2 48 0f 11 4d c0       rex.W movsd %xmm1,-0x40(%rbp)
  426bca:       f2 48 0f 10 15 3d 81    rex.W movsd 0x2813d(%rip),%xmm2
  426bd1:       02 00 
  426bd3:       f2 48 0f 11 55 c8       rex.W movsd %xmm2,-0x38(%rbp)
  426bd9:       48 8d 45 c0             lea    -0x40(%rbp),%rax
  426bdd:       48 89 45 d0             mov    %rax,-0x30(%rbp)
  426be1:       48 8d 55 e0             lea    -0x20(%rbp),%rdx
  426be5:       48 b8 02 00 00 00 00    movabs $0x2,%rax
  426bec:       00 00 00 
  426bef:       48 89 c1                mov    %rax,%rcx
  426bf2:       49 89 d0                mov    %rdx,%r8
  426bf5:       51                      push   %rcx
  426bf6:       41 50                   push   %r8
  426bf8:       48 be 02 00 00 00 00    movabs $0x2,%rsi
  426bff:       00 00 00 
  426c02:       48 bf c0 84 65 00 00    movabs $0x6584c0,%rdi
  426c09:       00 00 00 
  426c0c:       e8 87 ce 00 00          callq  433a98 <_d_arrayliteralTX>
  426c11:       48 89 45 f0             mov    %rax,-0x10(%rbp)
  426c15:       f2 48 0f 10 05 02 81    rex.W movsd 0x28102(%rip),%xmm0
  426c1c:       02 00 
  426c1e:       f2 48 0f 11 00          rex.W movsd %xmm0,(%rax)
  426c23:       f2 48 0f 10 0d f4 80    rex.W movsd 0x280f4(%rip),%xmm1
  426c2a:       02 00 
  426c2c:       48 8b 45 f0             mov    -0x10(%rbp),%rax
  426c30:       f2 48 0f 11 48 08       rex.W movsd %xmm1,0x8(%rax)
  426c36:       48 8b 55 f0             mov    -0x10(%rbp),%rdx
  426c3a:       48 be 02 00 00 00 00    movabs $0x2,%rsi
  426c41:       00 00 00 
  426c44:       41 58                   pop    %r8
  426c46:       59                      pop    %rcx
  426c47:       48 bf 08 00 00 00 00    movabs $0x8,%rdi
  426c4e:       00 00 00 
  426c51:       e8 8e 95 00 00          callq  4301e4 <_d_arraycopy>
  426c56:       f2 0f 12 4d b0          movddup -0x50(%rbp),%xmm1
  426c5b:       66 0f 11 4d d0          movupd %xmm1,-0x30(%rbp)
  426c60:       ff 75 c8                pushq  -0x38(%rbp)
  426c63:       ff 75 c0                pushq  -0x40(%rbp)
  426c66:       e8 09 00 00 00          callq  426c74 <_D3std5stdio16__T7writelnTG2dZ7writelnFG2dZv>
  426c6b:       48 83 c4 10             add    $0x10,%rsp
  426c6f:       31 c0                   xor    %eax,%eax
  426c71:       c9                      leaveq 
  426c72:       c3                      retq   
  426c73:       90                      nop
0000000000 426B88:
426b88:55%按需付费
426b89:488B ec mov%rsp,%rbp
426b8c:48 83 ec 50分$0x50,%rsp
426b90:F248 0f 10 05 77 81 rex.W movsd 0x28177(%rip),%xmm0
426b97:02 00
426b99:F248 0f 11 45 b0 rex.W movsd%xmm0,-0x50(%rbp)
426b9f:48 be 02 00 movabs$0x2,%rsi
426ba6:00
426ba9:F248 0f 10 05 66 81 rex.W movsd 0x28166(%rip),%xmm0
426bb0:02 00
426bb2:48 8d 7d c0 lea-0x40(%rbp),%rdi
426bb6:e8 65 d1 00 00呼叫433d20
426BB:F248 0f 10 0d 4c 81 rex.W movsd 0x2814c(%rip),%xmm1
426bc2:02 00
426bc4:F248 0f 11 4d c0 rex.W移动%xmm1,-0x40(%rbp)
426bca:F248 0f 10 15 3d 81 rex.W movsd 0x2813d(%rip),%xmm2
426bd1:02 00
426bd3:F248 0f 11 55 c8 rex.W movsd%xmm2,-0x38(%rbp)
426bd9:48 8d 45 c0 lea-0x40(%rbp),%rax
426bdd:48 89 45 d0 mov%rax,-0x30(%rbp)
426be1:48 8d 55 e0 lea-0x20(%rbp),%rdx
426be5:48 b8 02 00 movabs$0x2,%rax
426贝克:00
426bef:48 89 c1移动百分比rax,%rcx
426bf2:49 89 d0 mov%rdx,%r8
426bf5:51推力%rcx
426bf6:41 50%推力r8
426bf8:48 be 02 00 movabs$0x2,%rsi
426BF:00
426c02:48 bf c0 84 65 00 movabs$0x6584c0,%rdi
426c09:00
426c0c:e8 87 ce 00 00 callq 433a98
426c11:48 89 45 f0 mov%rax,-0x10(%rbp)
426c15:F248 0f 10 05 02 81 rex.W movsd 0x28102(%rip),%xmm0
426c1c:02 00
426c1e:F248 0f 11 00 rex.W移动%xmm0,(%rax)
426c23:F248 0f 10 0d f4 80 rex.W movsd 0x280f4(%rip),%xmm1
426c2a:02 00
426c2c:48 8b 45 f0 mov-0x10(%rbp),%rax
426c30:F248 0f 11 48 08 rex.W移动%xmm1,0x8(%rax)
426c36:48 8b 55 f0 mov-0x10(%rbp),%rdx
426c3a:48 be 02 00 movabs$0x2,%rsi
426c41:00
426c44:41 58%r8
426c46:59 pop%rcx
426c47:48 bf 08 00 movabs$0x8,%rdi
426c4e:00
426c51:e8 8e 95 00 00 callq 4301e4
426c56:F20F 12 4d b0 movddup-0x50(%rbp),%xmm1
426c5b:66 0f 11 4d d0 movupd%xmm1,-0x30(%rbp)
426c60:ff 75 c8推杆-0x38(%rbp)
426c63:ff 75 c0推力-0x40(%rbp)
426c66:e8 09 00 00呼叫426c74
426c6b:48 83 c4 10添加$0x10,%rsp
426c6f:31 c0异或%eax,%eax
426c71:c9-Q
426c72:c3 retq
426c73:90无

我研究了它,显然编译器决定通过
movupd[aptr],XMM1
你真正的意思是
movupd-aptr,XMM1
。预先将
aptr
加载到寄存器中(
mov-aptr,RAX;movupd[RAX],XMM1
)将使其工作

您可能应该提交一份bug报告