C++ 标准::向量<;uint8_t>;C++;11/14启用
使用gcc 4.9,使用Linaro工具链对ARM进行交叉编译,我发现添加C++ 标准::向量<;uint8_t>;C++;11/14启用,c++,c++11,gcc,c++14,stdvector,C++,C++11,Gcc,C++14,Stdvector,使用gcc 4.9,使用Linaro工具链对ARM进行交叉编译,我发现添加-std=c++14时,vector.assign()的编译结果发生了变化,从而造成了严重的性能问题 我已经尝试了几种不同的方法来完成这个分配+复制,但是只要我使用std::vector来完成,它们都有这个性能问题 我可以用这个玩具例子重现这个问题: VectorTest.h #include <stdint.h> #include <stddef.h> #include <vector>
-std=c++14
时,vector.assign()
的编译结果发生了变化,从而造成了严重的性能问题
我已经尝试了几种不同的方法来完成这个分配+复制,但是只要我使用std::vector
来完成,它们都有这个性能问题
我可以用这个玩具例子重现这个问题:
VectorTest.h
#include <stdint.h>
#include <stddef.h>
#include <vector>
struct VectorWrapper_t
{
VectorWrapper_t(uint8_t const* pData, size_t length);
std::vector<uint8_t> data;
};
gcc标志:
-std=c++14 \
-mthumb -march=armv7-a -mtune=cortex-a9 \
-mlittle-endian -mfloat-abi=hard -mfpu=neon -Wa,-mimplicit-it=thumb \
-O2 -g
查看程序集,我可以了解原因:原始版本(我假设是C++03?)调用memmove
,而C++14版本添加了一个额外的循环,看起来像是手动复制数据。查看gcc添加的.loc
标记和-fverbose asm
,此循环中的指令来自stl_construct.h
和stl_uninitialized.h
更改为gcc 5.2.1(使用C++14),除了使用memcpy
而不是memmove
之外,它的编译与C++03示例几乎相同
我可以通过使用std::unique_ptr
而不是vector
来解决这个问题。但是,我想弄清这个问题的根源,以了解其他使用vector
s的地方是否存在性能问题,以及如何潜在地解决这些问题(更新到gcc 5.2是不切实际的)
所以我的问题是:为什么它在C++11/14下的编译方式不同?
作为参考,gcc--version
报告:arm linux gnueabihf gcc(Linaro gcc 4.9-2014.12)4.9.3 20141205(预发布)
以下是生成的程序集:
# C++03, gcc 4.9
push {r3, r4, r5, r6, r7, lr} @
movs r3, #0 @ tmp118,
mov r4, r0 @ this, this
str r3, [r0] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_start
mov r5, r2 @ length, length
str r3, [r0, #4] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_finish
str r3, [r0, #8] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_end_of_storage
cbnz r2, .L19 @ length,
mov r0, r4 @, this
pop {r3, r4, r5, r6, r7, pc} @
.L19:
mov r0, r2 @, length
mov r6, r1 @ pData, pData
bl _Znwj @
mov r2, r5 @, length
mov r1, r6 @, pData
mov r7, r0 @ D.13516,
bl memmove @
ldr r0, [r4] @ D.13515, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_start
cbz r0, .L3 @ D.13515,
bl _ZdlPv @
.L3:
add r5, r5, r7 @ D.13515, D.13516
str r7, [r4] @ D.13516, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_start
str r5, [r4, #4] @ D.13515, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_finish
mov r0, r4 @, this
str r5, [r4, #8] @ D.13515, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_end_of_storage
pop {r3, r4, r5, r6, r7, pc} @
.L6:
ldr r0, [r4] @ D.13515, MEM[(struct _Vector_base *)this_1(D)]._M_impl._M_start
cbz r0, .L5 @ D.13515,
bl _ZdlPv @
.L5:
bl __cxa_end_cleanup @
# C++14, gcc 4.9
push {r3, r4, r5, r6, r7, lr} @
movs r3, #0 @ tmp157,
mov r6, r0 @ this, this
str r3, [r0] @ tmp157, MEM[(struct _Vector_impl *)this_1(D)]._M_start
mov r5, r2 @ length, length
str r3, [r0, #4] @ tmp157, MEM[(struct _Vector_impl *)this_1(D)]._M_finish
str r3, [r0, #8] @ tmp157, MEM[(struct _Vector_impl *)this_1(D)]._M_end_of_storage
cbnz r2, .L25 @ length,
mov r0, r6 @, this
pop {r3, r4, r5, r6, r7, pc} @
.L25:
mov r0, r2 @, length
mov r4, r1 @ pData, pData
bl _Znwj @
adds r3, r4, r5 @ D.20345, pData, length
mov r7, r0 @ __result,
cmp r4, r3 @ pData, D.20345
ittt ne
addne r1, r4, #-1 @ ivtmp.76, pData,
movne r3, r0 @ __result, __result
addne r4, r0, r5 @ D.20346, __result, length
beq .L26 @,
.L7:
ldrb r2, [r1, #1]! @ zero_extendqisi2 @ D.20348, MEM[base: _48, offset: 0]
cbz r3, .L6 @ __result,
strb r2, [r3] @ D.20348, MEM[base: __result_23, offset: 0B]
.L6:
adds r3, r3, #1 @ __result, __result,
cmp r3, r4 @ __result, D.20346
bne .L7 @,
.L8:
ldr r0, [r6] @ D.20346, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_start
cbz r0, .L5 @ D.20346,
bl _ZdlPv @
.L5:
str r7, [r6] @ __result, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_start
mov r0, r6 @, this
str r4, [r6, #4] @ D.20346, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_finish
str r4, [r6, #8] @ D.20346, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_end_of_storage
pop {r3, r4, r5, r6, r7, pc} @
.L26:
adds r4, r0, r5 @ D.20346, __result, length
b .L8 @
.L11:
ldr r0, [r6] @ D.20346, MEM[(struct _Vector_base *)this_1(D)]._M_impl._M_start
cbz r0, .L10 @ D.20346,
bl _ZdlPv @
.L10:
bl __cxa_end_cleanup @
# C++14, gcc 5.2
push {r3, r4, r5, r6, r7, lr} @
movs r3, #0 @ tmp118,
mov r4, r0 @ this, this
str r3, [r0] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_start
str r3, [r0, #4] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_finish
str r3, [r0, #8] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_end_of_storage
cbnz r2, .L19 @ length,
mov r0, r4 @, this
pop {r3, r4, r5, r6, r7, pc} @
.L19:
mov r0, r2 @, length
mov r6, r1 @ pData, pData
mov r5, r2 @ length, length
bl _Znwj @
mov r2, r5 @, length
mov r1, r6 @, pData
mov r7, r0 @ D.20824,
bl memcpy @
ldr r0, [r4] @ D.20823, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_start
cbz r0, .L3 @ D.20823,
bl _ZdlPv @
.L3:
add r5, r5, r7 @ D.20823, D.20824
str r7, [r4] @ D.20824, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_start
str r5, [r4, #4] @ D.20823, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_finish
mov r0, r4 @, this
str r5, [r4, #8] @ D.20823, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_end_of_storage
pop {r3, r4, r5, r6, r7, pc} @
.L6:
ldr r0, [r4] @ D.20823, MEM[(struct _Vector_base *)this_1(D)]._M_impl._M_start
cbz r0, .L5 @ D.20823,
bl _ZdlPv @
.L5:
bl __cxa_end_cleanup @
这是4.9.2版本中的一个GCC错误,请参阅。默认的
-std=gnu++03
模式和-std=c++14
之间的区别在于,对于c++11和更高版本,可能会有不可分配的琐碎类型(因为它们可能有一个已删除的赋值运算符),这会导致std::uninitialized_copy
的实现采用不同(较慢)的代码路径。对可分配性的检查是错误的,这意味着我们在不需要的时候走了一条慢路
我在两年前为GCC4.9.3修复了它,但是您的编译器基于4.9.2和4.9.3版本之间的快照,并且已经过了几周没有修复了
您可以要求Linaro将其GCC 4.9编译器更新为4.9.4,或者至少应用修补程序来修复此错误。这可能不会有太大区别,但为什么要分配而不是初始化?后者可以避免一次分配
VectorWrapper_t(uint8_t const*pData,size_t length):data(pData,pData+length){}
这个问题似乎更适合Linaro bug报告。2种可能性:要么代码开始相同,但gcc-5+检测到循环等效于memcpy,并用对memcpy的调用替换它,或者libstdc++获得了一些新的特殊路径,称为memcpy。尝试传递-fdump tree all
,查看一些早期/晚期转储以确定发生了什么。我手头没有任何标准,但分配的规范在它们之间是否发生了变化?如果不是,我会称之为回归bug@AndreiR. memmove
中的move
与C++11移动语义无关。这两个函数都不修改源数组。
# C++03, gcc 4.9
push {r3, r4, r5, r6, r7, lr} @
movs r3, #0 @ tmp118,
mov r4, r0 @ this, this
str r3, [r0] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_start
mov r5, r2 @ length, length
str r3, [r0, #4] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_finish
str r3, [r0, #8] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_end_of_storage
cbnz r2, .L19 @ length,
mov r0, r4 @, this
pop {r3, r4, r5, r6, r7, pc} @
.L19:
mov r0, r2 @, length
mov r6, r1 @ pData, pData
bl _Znwj @
mov r2, r5 @, length
mov r1, r6 @, pData
mov r7, r0 @ D.13516,
bl memmove @
ldr r0, [r4] @ D.13515, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_start
cbz r0, .L3 @ D.13515,
bl _ZdlPv @
.L3:
add r5, r5, r7 @ D.13515, D.13516
str r7, [r4] @ D.13516, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_start
str r5, [r4, #4] @ D.13515, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_finish
mov r0, r4 @, this
str r5, [r4, #8] @ D.13515, MEM[(struct vector *)this_1(D)].D.11902._M_impl._M_end_of_storage
pop {r3, r4, r5, r6, r7, pc} @
.L6:
ldr r0, [r4] @ D.13515, MEM[(struct _Vector_base *)this_1(D)]._M_impl._M_start
cbz r0, .L5 @ D.13515,
bl _ZdlPv @
.L5:
bl __cxa_end_cleanup @
# C++14, gcc 4.9
push {r3, r4, r5, r6, r7, lr} @
movs r3, #0 @ tmp157,
mov r6, r0 @ this, this
str r3, [r0] @ tmp157, MEM[(struct _Vector_impl *)this_1(D)]._M_start
mov r5, r2 @ length, length
str r3, [r0, #4] @ tmp157, MEM[(struct _Vector_impl *)this_1(D)]._M_finish
str r3, [r0, #8] @ tmp157, MEM[(struct _Vector_impl *)this_1(D)]._M_end_of_storage
cbnz r2, .L25 @ length,
mov r0, r6 @, this
pop {r3, r4, r5, r6, r7, pc} @
.L25:
mov r0, r2 @, length
mov r4, r1 @ pData, pData
bl _Znwj @
adds r3, r4, r5 @ D.20345, pData, length
mov r7, r0 @ __result,
cmp r4, r3 @ pData, D.20345
ittt ne
addne r1, r4, #-1 @ ivtmp.76, pData,
movne r3, r0 @ __result, __result
addne r4, r0, r5 @ D.20346, __result, length
beq .L26 @,
.L7:
ldrb r2, [r1, #1]! @ zero_extendqisi2 @ D.20348, MEM[base: _48, offset: 0]
cbz r3, .L6 @ __result,
strb r2, [r3] @ D.20348, MEM[base: __result_23, offset: 0B]
.L6:
adds r3, r3, #1 @ __result, __result,
cmp r3, r4 @ __result, D.20346
bne .L7 @,
.L8:
ldr r0, [r6] @ D.20346, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_start
cbz r0, .L5 @ D.20346,
bl _ZdlPv @
.L5:
str r7, [r6] @ __result, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_start
mov r0, r6 @, this
str r4, [r6, #4] @ D.20346, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_finish
str r4, [r6, #8] @ D.20346, MEM[(struct vector *)this_1(D)].D.18218._M_impl._M_end_of_storage
pop {r3, r4, r5, r6, r7, pc} @
.L26:
adds r4, r0, r5 @ D.20346, __result, length
b .L8 @
.L11:
ldr r0, [r6] @ D.20346, MEM[(struct _Vector_base *)this_1(D)]._M_impl._M_start
cbz r0, .L10 @ D.20346,
bl _ZdlPv @
.L10:
bl __cxa_end_cleanup @
# C++14, gcc 5.2
push {r3, r4, r5, r6, r7, lr} @
movs r3, #0 @ tmp118,
mov r4, r0 @ this, this
str r3, [r0] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_start
str r3, [r0, #4] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_finish
str r3, [r0, #8] @ tmp118, MEM[(struct _Vector_impl *)this_1(D)]._M_end_of_storage
cbnz r2, .L19 @ length,
mov r0, r4 @, this
pop {r3, r4, r5, r6, r7, pc} @
.L19:
mov r0, r2 @, length
mov r6, r1 @ pData, pData
mov r5, r2 @ length, length
bl _Znwj @
mov r2, r5 @, length
mov r1, r6 @, pData
mov r7, r0 @ D.20824,
bl memcpy @
ldr r0, [r4] @ D.20823, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_start
cbz r0, .L3 @ D.20823,
bl _ZdlPv @
.L3:
add r5, r5, r7 @ D.20823, D.20824
str r7, [r4] @ D.20824, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_start
str r5, [r4, #4] @ D.20823, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_finish
mov r0, r4 @, this
str r5, [r4, #8] @ D.20823, MEM[(struct vector *)this_1(D)].D.18751._M_impl._M_end_of_storage
pop {r3, r4, r5, r6, r7, pc} @
.L6:
ldr r0, [r4] @ D.20823, MEM[(struct _Vector_base *)this_1(D)]._M_impl._M_start
cbz r0, .L5 @ D.20823,
bl _ZdlPv @
.L5:
bl __cxa_end_cleanup @