Gcc 检测预处理器中的MOVBE可用性?

Gcc 检测预处理器中的MOVBE可用性?,gcc,macros,c-preprocessor,endianness,Gcc,Macros,C Preprocessor,Endianness,我在第6代Skylake上进行测试,cpu具有movbe: $ cat /proc/cpuinfo | grep movbe flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arc

我在第6代Skylake上进行测试,cpu具有
movbe

$ cat /proc/cpuinfo | grep movbe
flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36
clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art
arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf tsc_known_freq pni
pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid sse4_1 sse4_2
x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch
intel_pt tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2
erms invpcid mpx rdseed adx smap clflushopt xsaveopt xsavec xgetbv1 xsaves dtherm
ida arat pln pts hwp hwp_notify hwp_act_window hwp_epp
我没有看到下面的
\uuumovbe\uuuu
预处理器宏。即使使用
-mmovbe
,它也不会显示

我们使用哪个预处理器宏来确定
movbe
的可用性



下面是几个相关的堆栈溢出问题,但它们不讨论该主题:


    • 我相信您已经得出结论,对此没有定义。只需将
      -mmovbe
      的输出与
      -mno movbe
      进行比较,就可以清楚地看出这一点

      一些替代办法:

      • 如果您足够了解如何将-mmovbe添加到命令行,还可以添加自己的define
      • 您可能可以从另一个定义(脑海中浮现出哈斯韦尔)中推断出它,但这不能处理显式的
        -mmovbe
        情况。或者AMD。如果您决定走这条路,查看gcc,他们在其中讨论每个体系结构支持什么可能会有所帮助
      • 也许您最好的选择是在运行时通过
        cpuid
        检查它。传递eax=1将在ecx的(基于零的)位22中返回此功能

      谢谢你,大卫。问题是,在我尝试组装代码块之前,我需要确保编译器支持MOVBE。另一个问题是,MOVBE分散在早期处理器上。AMD推土机v4拥有它,一些低端原子也拥有它。因此,像
      \uuu haswell\uu
      这样的编译时检查可能是不完整的。我也需要运行时检查:)查看gcc/config/i386/driver-i386.c,我们看到gcc做了相反的事情:从movbe是否可用猜cpu。我喜欢这个讽刺。IAC,为什么“在我尝试汇编代码之前,需要确保编译器支持MOVBE”?因为我没有看到movbe的内置,所以我假设您使用的是内联asm?如果当前环境不支持汇编程序,您是否担心汇编程序错误?我见过人们通过使用指令的实际操作码而不是助记符来避免这种情况。显然,在错误的处理器上执行该操作是不好的,但是由于您正在执行运行时检查…Doh!我收回。显然有一个内置的:,至少对于gcc来说是这样。在这种情况下,您不必检查define。只需调用内置函数,如果可以,它将使用movbe,如果不能,则进行模拟。事实上,你还没有这样做,这表明这里还有一些其他的限制,我没有。谢谢大卫。是的,还有一些额外的限制。使用内联ASM允许我们违反一些语言规则,比如对齐和双关语。它可以将128位字节字符串视为4个32位整数(x86)或2个64位整数。还有一些宏技巧允许同一组内联ASM在GCC和MSVC下进行组装。没有宏可以检测MOVBE是一种阻力。我想我需要退回到编译器版本。使用MOVBE可以在关键路径上为我们节省4条指令(从主密钥导出密钥计划),因此保持事情整洁非常重要。手工调优的例程应该比C/C++代码的吞吐量提高3到5倍。我猜性能将从大约80个cpb下降到20或30个cpb(这个特定的算法是面向字节的,它的杀戮性能)。
      $ gcc -march=native -mmovbe -dM -E - </dev/null | sort
      #define __ABM__ 1
      #define __ADX__ 1
      #define __AES__ 1
      #define __amd64 1
      #define __amd64__ 1
      #define __ATOMIC_ACQ_REL 4
      #define __ATOMIC_ACQUIRE 2
      #define __ATOMIC_CONSUME 1
      #define __ATOMIC_HLE_ACQUIRE 65536
      #define __ATOMIC_HLE_RELEASE 131072
      #define __ATOMIC_RELAXED 0
      #define __ATOMIC_RELEASE 3
      #define __ATOMIC_SEQ_CST 5
      #define __AVX__ 1
      #define __AVX2__ 1
      #define __BIGGEST_ALIGNMENT__ 32
      #define __BMI__ 1
      #define __BMI2__ 1
      #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
      #define __CHAR16_TYPE__ short unsigned int
      #define __CHAR32_TYPE__ unsigned int
      #define __CHAR_BIT__ 8
      #define __CLFLUSHOPT__ 1
      #define __code_model_small__ 1
      #define __core_avx2 1
      #define __core_avx2__ 1
      #define __DBL_DECIMAL_DIG__ 17
      #define __DBL_DENORM_MIN__ ((double)4.94065645841246544177e-324L)
      #define __DBL_DIG__ 15
      #define __DBL_EPSILON__ ((double)2.22044604925031308085e-16L)
      #define __DBL_HAS_DENORM__ 1
      #define __DBL_HAS_INFINITY__ 1
      #define __DBL_HAS_QUIET_NAN__ 1
      #define __DBL_MANT_DIG__ 53
      #define __DBL_MAX_10_EXP__ 308
      #define __DBL_MAX__ ((double)1.79769313486231570815e+308L)
      #define __DBL_MAX_EXP__ 1024
      #define __DBL_MIN_10_EXP__ (-307)
      #define __DBL_MIN__ ((double)2.22507385850720138309e-308L)
      #define __DBL_MIN_EXP__ (-1021)
      #define __DEC128_EPSILON__ 1E-33DL
      #define __DEC128_MANT_DIG__ 34
      #define __DEC128_MAX__ 9.999999999999999999999999999999999E6144DL
      #define __DEC128_MAX_EXP__ 6145
      #define __DEC128_MIN__ 1E-6143DL
      #define __DEC128_MIN_EXP__ (-6142)
      #define __DEC128_SUBNORMAL_MIN__ 0.000000000000000000000000000000001E-6143DL
      #define __DEC32_EPSILON__ 1E-6DF
      #define __DEC32_MANT_DIG__ 7
      #define __DEC32_MAX__ 9.999999E96DF
      #define __DEC32_MAX_EXP__ 97
      #define __DEC32_MIN__ 1E-95DF
      #define __DEC32_MIN_EXP__ (-94)
      #define __DEC32_SUBNORMAL_MIN__ 0.000001E-95DF
      #define __DEC64_EPSILON__ 1E-15DD
      #define __DEC64_MANT_DIG__ 16
      #define __DEC64_MAX__ 9.999999999999999E384DD
      #define __DEC64_MAX_EXP__ 385
      #define __DEC64_MIN__ 1E-383DD
      #define __DEC64_MIN_EXP__ (-382)
      #define __DEC64_SUBNORMAL_MIN__ 0.000000000000001E-383DD
      #define __DEC_EVAL_METHOD__ 2
      #define __DECIMAL_BID_FORMAT__ 1
      #define __DECIMAL_DIG__ 21
      #define __ELF__ 1
      #define __F16C__ 1
      #define __FINITE_MATH_ONLY__ 0
      #define __FLOAT_WORD_ORDER__ __ORDER_LITTLE_ENDIAN__
      #define __FLT_DECIMAL_DIG__ 9
      #define __FLT_DENORM_MIN__ 1.40129846432481707092e-45F
      #define __FLT_DIG__ 6
      #define __FLT_EPSILON__ 1.19209289550781250000e-7F
      #define __FLT_EVAL_METHOD__ 0
      #define __FLT_HAS_DENORM__ 1
      #define __FLT_HAS_INFINITY__ 1
      #define __FLT_HAS_QUIET_NAN__ 1
      #define __FLT_MANT_DIG__ 24
      #define __FLT_MAX_10_EXP__ 38
      #define __FLT_MAX__ 3.40282346638528859812e+38F
      #define __FLT_MAX_EXP__ 128
      #define __FLT_MIN_10_EXP__ (-37)
      #define __FLT_MIN__ 1.17549435082228750797e-38F
      #define __FLT_MIN_EXP__ (-125)
      #define __FLT_RADIX__ 2
      #define __FMA__ 1
      #define __FP_FAST_FMA 1
      #define __FP_FAST_FMAF 1
      #define __FSGSBASE__ 1
      #define __FXSR__ 1
      #define __GCC_ASM_FLAG_OUTPUTS__ 1
      #define __GCC_ATOMIC_BOOL_LOCK_FREE 2
      #define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
      #define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
      #define __GCC_ATOMIC_CHAR_LOCK_FREE 2
      #define __GCC_ATOMIC_INT_LOCK_FREE 2
      #define __GCC_ATOMIC_LLONG_LOCK_FREE 2
      #define __GCC_ATOMIC_LONG_LOCK_FREE 2
      #define __GCC_ATOMIC_POINTER_LOCK_FREE 2
      #define __GCC_ATOMIC_SHORT_LOCK_FREE 2
      #define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
      #define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
      #define __GCC_HAVE_DWARF2_CFI_ASM 1
      #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
      #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1
      #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
      #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
      #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
      #define __GCC_IEC_559 2
      #define __GCC_IEC_559_COMPLEX 2
      #define __GNUC__ 6
      #define __GNUC_MINOR__ 3
      #define __GNUC_PATCHLEVEL__ 1
      #define __GNUC_RH_RELEASE__ 1
      #define __GNUC_STDC_INLINE__ 1
      #define __gnu_linux__ 1
      #define __GXX_ABI_VERSION 1010
      #define __has_include_next(STR) __has_include_next__(STR)
      #define __has_include(STR) __has_include__(STR)
      #define __haswell 1
      #define __haswell__ 1
      #define __INT16_C(c) c
      #define __INT16_MAX__ 0x7fff
      #define __INT16_TYPE__ short int
      #define __INT32_C(c) c
      #define __INT32_MAX__ 0x7fffffff
      #define __INT32_TYPE__ int
      #define __INT64_C(c) c ## L
      #define __INT64_MAX__ 0x7fffffffffffffffL
      #define __INT64_TYPE__ long int
      #define __INT8_C(c) c
      #define __INT8_MAX__ 0x7f
      #define __INT8_TYPE__ signed char
      #define __INT_FAST16_MAX__ 0x7fffffffffffffffL
      #define __INT_FAST16_TYPE__ long int
      #define __INT_FAST32_MAX__ 0x7fffffffffffffffL
      #define __INT_FAST32_TYPE__ long int
      #define __INT_FAST64_MAX__ 0x7fffffffffffffffL
      #define __INT_FAST64_TYPE__ long int
      #define __INT_FAST8_MAX__ 0x7f
      #define __INT_FAST8_TYPE__ signed char
      #define __INT_LEAST16_MAX__ 0x7fff
      #define __INT_LEAST16_TYPE__ short int
      #define __INT_LEAST32_MAX__ 0x7fffffff
      #define __INT_LEAST32_TYPE__ int
      #define __INT_LEAST64_MAX__ 0x7fffffffffffffffL
      #define __INT_LEAST64_TYPE__ long int
      #define __INT_LEAST8_MAX__ 0x7f
      #define __INT_LEAST8_TYPE__ signed char
      #define __INT_MAX__ 0x7fffffff
      #define __INTMAX_C(c) c ## L
      #define __INTMAX_MAX__ 0x7fffffffffffffffL
      #define __INTMAX_TYPE__ long int
      #define __INTPTR_MAX__ 0x7fffffffffffffffL
      #define __INTPTR_TYPE__ long int
      #define __LDBL_DENORM_MIN__ 3.64519953188247460253e-4951L
      #define __LDBL_DIG__ 18
      #define __LDBL_EPSILON__ 1.08420217248550443401e-19L
      #define __LDBL_HAS_DENORM__ 1
      #define __LDBL_HAS_INFINITY__ 1
      #define __LDBL_HAS_QUIET_NAN__ 1
      #define __LDBL_MANT_DIG__ 64
      #define __LDBL_MAX_10_EXP__ 4932
      #define __LDBL_MAX__ 1.18973149535723176502e+4932L
      #define __LDBL_MAX_EXP__ 16384
      #define __LDBL_MIN_10_EXP__ (-4931)
      #define __LDBL_MIN__ 3.36210314311209350626e-4932L
      #define __LDBL_MIN_EXP__ (-16381)
      #define __linux 1
      #define __linux__ 1
      #define linux 1
      #define __LONG_LONG_MAX__ 0x7fffffffffffffffLL
      #define __LONG_MAX__ 0x7fffffffffffffffL
      #define __LP64__ 1
      #define _LP64 1
      #define __LZCNT__ 1
      #define __MMX__ 1
      #define __NO_INLINE__ 1
      #define __ORDER_BIG_ENDIAN__ 4321
      #define __ORDER_LITTLE_ENDIAN__ 1234
      #define __ORDER_PDP_ENDIAN__ 3412
      #define __PCLMUL__ 1
      #define __POPCNT__ 1
      #define __PRAGMA_REDEFINE_EXTNAME 1
      #define __PRFCHW__ 1
      #define __PTRDIFF_MAX__ 0x7fffffffffffffffL
      #define __PTRDIFF_TYPE__ long int
      #define __RDRND__ 1
      #define __RDSEED__ 1
      #define __REGISTER_PREFIX__
      #define __SCHAR_MAX__ 0x7f
      #define __SEG_FS 1
      #define __SEG_GS 1
      #define __SHRT_MAX__ 0x7fff
      #define __SIG_ATOMIC_MAX__ 0x7fffffff
      #define __SIG_ATOMIC_MIN__ (-__SIG_ATOMIC_MAX__ - 1)
      #define __SIG_ATOMIC_TYPE__ int
      #define __SIZE_MAX__ 0xffffffffffffffffUL
      #define __SIZEOF_DOUBLE__ 8
      #define __SIZEOF_FLOAT128__ 16
      #define __SIZEOF_FLOAT__ 4
      #define __SIZEOF_FLOAT80__ 16
      #define __SIZEOF_INT128__ 16
      #define __SIZEOF_INT__ 4
      #define __SIZEOF_LONG__ 8
      #define __SIZEOF_LONG_DOUBLE__ 16
      #define __SIZEOF_LONG_LONG__ 8
      #define __SIZEOF_POINTER__ 8
      #define __SIZEOF_PTRDIFF_T__ 8
      #define __SIZEOF_SHORT__ 2
      #define __SIZEOF_SIZE_T__ 8
      #define __SIZEOF_WCHAR_T__ 4
      #define __SIZEOF_WINT_T__ 4
      #define __SIZE_TYPE__ long unsigned int
      #define __SSE__ 1
      #define __SSE2__ 1
      #define __SSE2_MATH__ 1
      #define __SSE3__ 1
      #define __SSE4_1__ 1
      #define __SSE4_2__ 1
      #define __SSE_MATH__ 1
      #define __SSSE3__ 1
      #define __STDC__ 1
      #define __STDC_HOSTED__ 1
      #define __STDC_IEC_559__ 1
      #define __STDC_IEC_559_COMPLEX__ 1
      #define __STDC_ISO_10646__ 201605L
      #define __STDC_NO_THREADS__ 1
      #define _STDC_PREDEF_H 1
      #define __STDC_UTF_16__ 1
      #define __STDC_UTF_32__ 1
      #define __STDC_VERSION__ 201112L
      #define __tune_core_avx2__ 1
      #define __tune_haswell__ 1
      #define __UINT16_C(c) c
      #define __UINT16_MAX__ 0xffff
      #define __UINT16_TYPE__ short unsigned int
      #define __UINT32_C(c) c ## U
      #define __UINT32_MAX__ 0xffffffffU
      #define __UINT32_TYPE__ unsigned int
      #define __UINT64_C(c) c ## UL
      #define __UINT64_MAX__ 0xffffffffffffffffUL
      #define __UINT64_TYPE__ long unsigned int
      #define __UINT8_C(c) c
      #define __UINT8_MAX__ 0xff
      #define __UINT8_TYPE__ unsigned char
      #define __UINT_FAST16_MAX__ 0xffffffffffffffffUL
      #define __UINT_FAST16_TYPE__ long unsigned int
      #define __UINT_FAST32_MAX__ 0xffffffffffffffffUL
      #define __UINT_FAST32_TYPE__ long unsigned int
      #define __UINT_FAST64_MAX__ 0xffffffffffffffffUL
      #define __UINT_FAST64_TYPE__ long unsigned int
      #define __UINT_FAST8_MAX__ 0xff
      #define __UINT_FAST8_TYPE__ unsigned char
      #define __UINT_LEAST16_MAX__ 0xffff
      #define __UINT_LEAST16_TYPE__ short unsigned int
      #define __UINT_LEAST32_MAX__ 0xffffffffU
      #define __UINT_LEAST32_TYPE__ unsigned int
      #define __UINT_LEAST64_MAX__ 0xffffffffffffffffUL
      #define __UINT_LEAST64_TYPE__ long unsigned int
      #define __UINT_LEAST8_MAX__ 0xff
      #define __UINT_LEAST8_TYPE__ unsigned char
      #define __UINTMAX_C(c) c ## UL
      #define __UINTMAX_MAX__ 0xffffffffffffffffUL
      #define __UINTMAX_TYPE__ long unsigned int
      #define __UINTPTR_MAX__ 0xffffffffffffffffUL
      #define __UINTPTR_TYPE__ long unsigned int
      #define __unix 1
      #define __unix__ 1
      #define unix 1
      #define __USER_LABEL_PREFIX__
      #define __VERSION__ "6.3.1 20161221 (Red Hat 6.3.1-1)"
      #define __WCHAR_MAX__ 0x7fffffff
      #define __WCHAR_MIN__ (-__WCHAR_MAX__ - 1)
      #define __WCHAR_TYPE__ int
      #define __WINT_MAX__ 0xffffffffU
      #define __WINT_MIN__ 0U
      #define __WINT_TYPE__ unsigned int
      #define __x86_64 1
      #define __x86_64__ 1
      #define __XSAVE__ 1
      #define __XSAVEC__ 1
      #define __XSAVEOPT__ 1
      #define __XSAVES__ 1
      
      skylake:~$ gcc -v
      Using built-in specs.
      COLLECT_GCC=gcc
      COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/6.3.1/lto-wrapper
      Target: x86_64-redhat-linux
      Configured with: ../configure --enable-bootstrap --enable-languages=c,c++,objc,obj-c++,fortran,ada,go,lto --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared --enable-threads=posix --enable-checking=release --enable-multilib --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu --enable-plugin --enable-initfini-array --disable-libgcj --with-isl --enable-libmpx --enable-gnu-indirect-function --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux
      Thread model: posix
      gcc version 6.3.1 20161221 (Red Hat 6.3.1-1) (GCC)