C++ 为什么在这个解包的std::string dtor中有一个锁定的xadd指令?

C++ 为什么在这个解包的std::string dtor中有一个锁定的xadd指令?,c++,gcc,assembly,x86-64,atomic,C++,Gcc,Assembly,X86 64,Atomic,我有一个非常简单的代码: #include <string> #include <iostream> int main() { std::string s("abc"); std::cout << s; } 然后反编译,最有趣的是: 00000000004009a0 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10>: 4009a0: 48 81 ff a0 11 60 00


#include <string>
#include <iostream>

int main() {
    std::string s("abc");
    std::cout << s;

00000000004009a0 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10>:
  4009a0:       48 81 ff a0 11 60 00    cmp    rdi,0x6011a0
  4009a7:       75 01                   jne    4009aa <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0xa>
  4009a9:       c3                      ret    
  4009aa:       b8 00 00 00 00          mov    eax,0x0
  4009af:       48 85 c0                test   rax,rax
  4009b2:       74 11                   je     4009c5 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0x25>
  4009b4:       83 c8 ff                or     eax,0xffffffff
  4009b7:       f0 0f c1 47 10          lock xadd DWORD PTR [rdi+0x10],eax
  4009bc:       85 c0                   test   eax,eax
  4009be:       7f e9                   jg     4009a9 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0x9>
  4009c0:       e9 cb fd ff ff          jmp    400790 <_ZdlPv@plt>
  4009c5:       8b 47 10                mov    eax,DWORD PTR [rdi+0x10]
  4009c8:       8d 50 ff                lea    edx,[rax-0x1]
  4009cb:       89 57 10                mov    DWORD PTR [rdi+0x10],edx
  4009ce:       eb ec                   jmp    4009bc <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0x1c>
0000000000 4009A0:
4009a0:48 81 ff a0 11 60 00 cmp rdi,0x6011a0
4009a7:75 01 jne 4009aa
4009a9:c3 ret
4009aa:B800 mov eax,0x0
4009af:48 85 c0测试rax,rax
4009b2:74 11 je 4009c5
4009b4:83 c8 ff或eax,0xffffffff
4009b7:f0 0f c1 47 10锁xadd DWORD PTR[rdi+0x10],eax
4009bc:85 c0测试eax,eax
4009be:7f e9 jg 4009a9
4009c0:e9 cb fd ff ff jmp 400790
4009c5:8b 47 10 mov eax,DWORD PTR[rdi+0x10]
4009c8:8d 50 ff lea edx[rax-0x1]
4009cb:89 57 10 mov DWORD PTR[rdi+0x10],edx
4009ce:eb ec jmp 4009bc
\u ZNSs4\u Rep10\u M\u disposeERKSaIcE.isra.10
std::basic\u string:::\u Rep::\u M\u dispose(std::allocator const&)[clone.isra.10]




\u ZNSs4\u Rep10\u M\u disposeERKSaIcE
de-mangles1 to
std::basic\u string::\u Rep::\u M\u dispose(std::allocator const&)
。以下是gcc-4.x era2中libstdc++的主要功能:

if(uuu builtin_uexpect(this!=&us_uempty_rep(),false))
_GLIBCXX\u同步\u发生在\u之前(&this->\u M\u refcount);
如果(\uuuu gnu\u cxx::\uuuu exchange\u和\u add\u dispatch(&this->\u M\u refcount),
-1) _M_refcount);


00000000004009a0 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10>:

  # the next two lines implement the check:
  # if (__builtin_expect(this != &_S_empty_rep(), false))
  # which is an empty string optimization. The S_empty_rep singleton
  # is at address 0x6011a0 and if the current buffer points to that
  # we are done (execute the ret)
  4009a0: cmp    rdi,0x6011a0
  4009a7: jne    4009aa <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0xa>
  4009a9: ret

  # now we are in the implementation of
  # __gnu_cxx::__exchange_and_add_dispatch(&this->_M_refcount, -1)
  # which dispatches either to an atomic version of the add function
  # or the non-atomic version, depending on the value of `eax` which
  # is always directly set to zero, so the non-atomic version is 
  # *always called* (see details below)
  4009aa: mov    eax,0x0
  4009af: test   rax,rax
  4009b2: je     4009c5 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0x25>

  # this is the atomic version of the decrement you were concerned about
  # but we never execute this code because the test above always jumps
  # to 4009c5 (the non-atomic version)
  4009b4: or     eax,0xffffffff
  4009b7: lock xadd DWORD PTR [rdi+0x10],eax
  4009bc: test   eax,eax
  # check if the result of the xadd was zero, if not skip the delete
  4009be: jg     4009a9 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0x9>
  # the delete call
  4009c0: jmp    400790 <_ZdlPv@plt> # tailcall

  # the non-atomic version starts here, this is the code that is 
  # always executed
  4009c5: mov    eax,DWORD PTR [rdi+0x10]
  4009c8: lea    edx,[rax-0x1]
  4009cb: mov    DWORD PTR [rdi+0x10],edx
  # this jumps up to the test eax,eax check which calls operator delete
  # if the refcount was zero
  4009ce: jmp    4009bc <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0x1c>
0000000000 4009A0:
#if(uuu builtin_uexpect(this!=&us_uempty_rep(),false))
#这是一个空字符串优化。S_empty_rep singleton
4009a0:cmp rdi,0x6011a0
4009a7:jne 4009aa
#\u gnu\u cxx::\u交换\u和\u添加\u分派(&this->\u M\u refcount,-1)
4009aa:mov eax,0x0
4009b2:je 4009c5
4009b7:锁xadd DWORD PTR[rdi+0x10],eax
4009be:jg 4009a9
4009c0:jmp 400790#tailcall
4009c5:mov eax,DWORD PTR[rdi+0x10]
4009c8:lea edx[rax-0x1]
4009cb:mov DWORD PTR[rdi+0x10],edx
4009ce:jmp 4009bc
lock xadd

\uuuuu gnu\ucxx::\uuuu atomic\u add\u dispatch
\uupthread\u key\u create
mov eax,0x0

\uuuuuuu exchange\u和
00000000004009a0 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10>:

  # the next two lines implement the check:
  # if (__builtin_expect(this != &_S_empty_rep(), false))
  # which is an empty string optimization. The S_empty_rep singleton
  # is at address 0x6011a0 and if the current buffer points to that
  # we are done (execute the ret)
  4009a0: cmp    rdi,0x6011a0
  4009a7: jne    4009aa <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0xa>
  4009a9: ret

  # now we are in the implementation of
  # __gnu_cxx::__exchange_and_add_dispatch(&this->_M_refcount, -1)
  # which dispatches either to an atomic version of the add function
  # or the non-atomic version, depending on the value of `eax` which
  # is always directly set to zero, so the non-atomic version is 
  # *always called* (see details below)
  4009aa: mov    eax,0x0
  4009af: test   rax,rax
  4009b2: je     4009c5 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0x25>

  # this is the atomic version of the decrement you were concerned about
  # but we never execute this code because the test above always jumps
  # to 4009c5 (the non-atomic version)
  4009b4: or     eax,0xffffffff
  4009b7: lock xadd DWORD PTR [rdi+0x10],eax
  4009bc: test   eax,eax
  # check if the result of the xadd was zero, if not skip the delete
  4009be: jg     4009a9 <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0x9>
  # the delete call
  4009c0: jmp    400790 <_ZdlPv@plt> # tailcall

  # the non-atomic version starts here, this is the code that is 
  # always executed
  4009c5: mov    eax,DWORD PTR [rdi+0x10]
  4009c8: lea    edx,[rax-0x1]
  4009cb: mov    DWORD PTR [rdi+0x10],edx
  # this jumps up to the test eax,eax check which calls operator delete
  # if the refcount was zero
  4009ce: jmp    4009bc <_ZNSs4_Rep10_M_disposeERKSaIcE.isra.10+0x1c>