C++ 在CMakeLists.txt文件中启用优化标志
这是我第一次在这里问问题 预备役 我已经编写了一些代码来比较过滤器(如Bloom filter等)。 代码是可用的 在执行各种基准测试时,我注意到在标志之间切换C++ 在CMakeLists.txt文件中启用优化标志,c++,ubuntu,cmake,clion,C++,Ubuntu,Cmake,Clion,这是我第一次在这里问问题 预备役 我已经编写了一些代码来比较过滤器(如Bloom filter等)。 代码是可用的 在执行各种基准测试时,我注意到在标志之间切换 SET(CMAKE_CXX_FLAGS "-O0 -mavx2 -mbmi2") 或 大幅更改基准测试(某些测试快1000倍) 我试图确定哪个(或多个)特定标志导致了这种速度加快 问题陈述 我尝试手动添加标志,例如编写: SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -f
SET(CMAKE_CXX_FLAGS "-O0 -mavx2 -mbmi2")
或
大幅更改基准测试(某些测试快1000倍)
我试图确定哪个(或多个)特定标志导致了这种速度加快
问题陈述
我尝试手动添加标志,例如编写:
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -finline-functions -funswitch-loops -fpredictive-commoning -fgcse-after-reload -ftree-vectorize -fipa-cp-clone")
虽然我添加了所有提到的标志,但它并没有导致这样的加速
我是否正确启用了优化标志
感谢阅读,欢迎任何帮助
完整的CMakeLists.txt文件:
cmake_minimum_required(VERSION 3.14)
project(Filters)
set(CMAKE_CXX_STANDARD 14)
#SET(CMAKE_CXX_FLAGS "-O0 -mavx2 -mbmi2")
#SET(CMAKE_CXX_FLAGS "-O1 -mavx2 -mbmi2")
#SET(CMAKE_CXX_FLAGS "-O2 -mavx2 -mbmi2")
#SET(CMAKE_CXX_FLAGS "-O3 -mavx2 -mbmi2")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi2 -fauto-inc-dec -fcompare-elim -fcprop-registers -fdce -fdefer-pop -fdelayed-branch -fdse -fguess-branch-probability -fif-conversion2 -fif-conversion -fipa-pure-const -fipa-profile -fipa-reference -fmerge-constants -fsplit-wide-types -ftree-bit-ccp -ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse -ftree-forwprop -ftree-fre -ftree-phiprop -ftree-sra -ftree-pta -ftree-ter -funit-at-a-time -DNDEBUG -fthread-jumps -falign-functions -falign-jumps -falign-loops -falign-labels -fcaller-saves -fcrossjumping -fcse-follow-jumps -fcse-skip-blocks -fdelete-null-pointer-checks -fdevirtualize -fexpensive-optimizations -fgcse -fgcse-lm -finline-small-functions -findirect-inlining -fipa-sra -foptimize-sibling-calls -fpartial-inlining -fpeephole2 -fregmove -freorder-blocks -freorder-functions -frerun-cse-after-loop -fsched-interblock -fsched-spec -fschedule-insns -fschedule-insns2 -fstrict-aliasing -fstrict-overflow -ftree-switch-conversion -ftree-pre -ftree-vrp")
#include_directories(.)
include_directories(Bloom_Filter)
include_directories(PD_Filter)
include_directories(Tests)
include_directories(morton)
include_directories(xorfilter)
include_directories(cuckoofilter)
find_package(OpenSSL REQUIRED)
add_executable(
Filters
hashutil.h
Tests/wrappers.hpp
Tests/tests.hpp Tests/tests.cpp
Tests/new_tests.hpp Tests/new_tests.cpp
Tests/printutil.cpp Tests/printutil.hpp
Bloom_Filter/counting_bloom.h
Bloom_Filter/simd-block.h
Bloom_Filter/simd-block-fixed-fpp.h
PD_Filter/PD.cpp
PD_Filter/hashutil.hpp PD_Filter/hashutil.cpp
PD_Filter/hash_table.hpp PD_Filter/hash_table.cpp
PD_Filter/dict.hpp
PD_Filter/macros.h
# main.cpp
# morton/hash_util.h
# morton/block.h
# morton/bf.h
# morton/block.h
# morton/compressed_cuckoo_config.h
# morton/compressed_cuckoo_filter.h
# morton/fixed_point.h
# morton/morton_filter.h
# morton/morton_sample_configs.h
# morton/morton_util.h
# morton/test_util.h
# morton/util.h
# morton/vector_types.h
Tests/high_load.cpp Tests/high_load.hpp)
target_link_libraries(Filters OpenSSL::SSL)
编辑
现在使用添加编译选项
:
cmake_minimum_required(VERSION 3.14)
project(Filters)
set(CMAKE_CXX_STANDARD 14)
#SET(CMAKE_CXX_FLAGS "-O0 -mavx2 -mbmi2")
#SET(CMAKE_CXX_FLAGS "-O1 -mavx2 -mbmi2")
#SET(CMAKE_CXX_FLAGS "-O2 -mavx2 -mbmi2")
#SET(CMAKE_CXX_FLAGS "-O3 -mavx2 -mbmi2")
#include_directories(.)
include_directories(Bloom_Filter)
include_directories(PD_Filter)
include_directories(Tests)
include_directories(morton)
include_directories(xorfilter)
include_directories(cuckoofilter)
find_package(OpenSSL REQUIRED)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi2 -DNDEBUG")
add_compile_options(-fauto-inc-dec -fbranch-count-reg -fcombine-stack-adjustments -fcompare-elim -fcprop-registers -fdce -fdefer-pop -fdelayed-branch -fdse -fforward-propagate -fguess-branch-probability -fif-conversion -fif-conversion2 -finline-functions-called-once -fipa-profile -fipa-pure-const -fipa-reference -fmerge-constants -fmove-loop-invariants -fomit-frame-pointer -freorder-blocks -fshrink-wrap -fshrink-wrap-separate -fsplit-wide-types -fssa-backprop -fssa-phiopt -ftree-bit-ccp -ftree-ccp -ftree-ch -ftree-coalesce-vars -ftree-copy-prop -ftree-dce -ftree-dominator-opts -ftree-dse -ftree-forwprop -ftree-fre -ftree-phiprop -ftree-pta -ftree-scev-cprop -ftree-sink -ftree-slsr -ftree-sra -ftree-ter -funit-at-a-time -falign-functions -falign-jumps -falign-labels -falign-loops -fcaller-saves -fcode-hoisting -fcrossjumping -fcse-follow-jumps -fcse-skip-blocks -fdelete-null-pointer-checks -fdevirtualize -fdevirtualize-speculatively -fexpensive-optimizations -fsplit-loops -fgcse -fgcse-lm -fhoist-adjacent-loads -finline-functions -finline-small-functions -findirect-inlining -fipa-bit-cp -fipa-cp -fipa-icf -fipa-ra -fipa-sra -fipa-vrp -fisolate-erroneous-paths-dereference -flra-remat -foptimize-sibling-calls -foptimize-strlen -fpartial-inlining -fpeephole2 -freorder-blocks-algorithm=stc -freorder-blocks-and-partition -freorder-functions -frerun-cse-after-loop -fschedule-insns -fschedule-insns2 -fsched-interblock -fsched-spec -fstore-merging -fstrict-aliasing -fthread-jumps -ftree-builtin-call-dce -ftree-pre -ftree-switch-conversion -ftree-tail-merge -ftree-vrp -fgcse-after-reload -fipa-cp-clone -floop-interchange -floop-unroll-and-jam -fpeel-loops -fpredictive-commoning -fsplit-loops -fsplit-paths -ftree-loop-distribution -ftree-loop-vectorize -ftree-partial-pre -ftree-slp-vectorize -funswitch-loops -fvect-cost-model -fvect-cost-model=dynamic)
add_executable(
Filters
hashutil.h
Tests/wrappers.hpp
Tests/tests.hpp Tests/tests.cpp
Tests/new_tests.hpp Tests/new_tests.cpp
Tests/printutil.cpp Tests/printutil.hpp
Bloom_Filter/counting_bloom.h
Bloom_Filter/simd-block.h
Bloom_Filter/simd-block-fixed-fpp.h
PD_Filter/PD.cpp
PD_Filter/hashutil.hpp PD_Filter/hashutil.cpp
PD_Filter/hash_table.hpp PD_Filter/hash_table.cpp
PD_Filter/dict.hpp
PD_Filter/macros.h
# main.cpp
# morton/hash_util.h
# morton/block.h
# morton/bf.h
# morton/block.h
# morton/compressed_cuckoo_config.h
# morton/compressed_cuckoo_filter.h
# morton/fixed_point.h
# morton/morton_filter.h
# morton/morton_sample_configs.h
# morton/morton_util.h
# morton/test_util.h
# morton/util.h
# morton/vector_types.h
Tests/high_load.cpp Tests/high_load.hpp)
target_link_libraries(Filters OpenSSL::SSL)
欢迎来到堆栈溢出!注意,链接的GCC页面描述了每个优化级别启用更多标志(
-O1
添加一些标志,-O2
添加更多标志,等等)。因此,您可能希望启用所有这些功能,然后逐个删除它们,以查看哪一个(或多个)会影响性能。FWIW,不鼓励手动操作CMAKE_CXX_FLAGS
变量,您应该改为使用设置这些标志。@squareskittles谢谢您的评论!这正是我想做的。问题是,即使我添加了所有的标志,我仍然看不到任何加速(可能是一个小的加速)。甚至使用添加编译选项来代替
。
cmake_minimum_required(VERSION 3.14)
project(Filters)
set(CMAKE_CXX_STANDARD 14)
#SET(CMAKE_CXX_FLAGS "-O0 -mavx2 -mbmi2")
#SET(CMAKE_CXX_FLAGS "-O1 -mavx2 -mbmi2")
#SET(CMAKE_CXX_FLAGS "-O2 -mavx2 -mbmi2")
#SET(CMAKE_CXX_FLAGS "-O3 -mavx2 -mbmi2")
#include_directories(.)
include_directories(Bloom_Filter)
include_directories(PD_Filter)
include_directories(Tests)
include_directories(morton)
include_directories(xorfilter)
include_directories(cuckoofilter)
find_package(OpenSSL REQUIRED)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi2 -DNDEBUG")
add_compile_options(-fauto-inc-dec -fbranch-count-reg -fcombine-stack-adjustments -fcompare-elim -fcprop-registers -fdce -fdefer-pop -fdelayed-branch -fdse -fforward-propagate -fguess-branch-probability -fif-conversion -fif-conversion2 -finline-functions-called-once -fipa-profile -fipa-pure-const -fipa-reference -fmerge-constants -fmove-loop-invariants -fomit-frame-pointer -freorder-blocks -fshrink-wrap -fshrink-wrap-separate -fsplit-wide-types -fssa-backprop -fssa-phiopt -ftree-bit-ccp -ftree-ccp -ftree-ch -ftree-coalesce-vars -ftree-copy-prop -ftree-dce -ftree-dominator-opts -ftree-dse -ftree-forwprop -ftree-fre -ftree-phiprop -ftree-pta -ftree-scev-cprop -ftree-sink -ftree-slsr -ftree-sra -ftree-ter -funit-at-a-time -falign-functions -falign-jumps -falign-labels -falign-loops -fcaller-saves -fcode-hoisting -fcrossjumping -fcse-follow-jumps -fcse-skip-blocks -fdelete-null-pointer-checks -fdevirtualize -fdevirtualize-speculatively -fexpensive-optimizations -fsplit-loops -fgcse -fgcse-lm -fhoist-adjacent-loads -finline-functions -finline-small-functions -findirect-inlining -fipa-bit-cp -fipa-cp -fipa-icf -fipa-ra -fipa-sra -fipa-vrp -fisolate-erroneous-paths-dereference -flra-remat -foptimize-sibling-calls -foptimize-strlen -fpartial-inlining -fpeephole2 -freorder-blocks-algorithm=stc -freorder-blocks-and-partition -freorder-functions -frerun-cse-after-loop -fschedule-insns -fschedule-insns2 -fsched-interblock -fsched-spec -fstore-merging -fstrict-aliasing -fthread-jumps -ftree-builtin-call-dce -ftree-pre -ftree-switch-conversion -ftree-tail-merge -ftree-vrp -fgcse-after-reload -fipa-cp-clone -floop-interchange -floop-unroll-and-jam -fpeel-loops -fpredictive-commoning -fsplit-loops -fsplit-paths -ftree-loop-distribution -ftree-loop-vectorize -ftree-partial-pre -ftree-slp-vectorize -funswitch-loops -fvect-cost-model -fvect-cost-model=dynamic)
add_executable(
Filters
hashutil.h
Tests/wrappers.hpp
Tests/tests.hpp Tests/tests.cpp
Tests/new_tests.hpp Tests/new_tests.cpp
Tests/printutil.cpp Tests/printutil.hpp
Bloom_Filter/counting_bloom.h
Bloom_Filter/simd-block.h
Bloom_Filter/simd-block-fixed-fpp.h
PD_Filter/PD.cpp
PD_Filter/hashutil.hpp PD_Filter/hashutil.cpp
PD_Filter/hash_table.hpp PD_Filter/hash_table.cpp
PD_Filter/dict.hpp
PD_Filter/macros.h
# main.cpp
# morton/hash_util.h
# morton/block.h
# morton/bf.h
# morton/block.h
# morton/compressed_cuckoo_config.h
# morton/compressed_cuckoo_filter.h
# morton/fixed_point.h
# morton/morton_filter.h
# morton/morton_sample_configs.h
# morton/morton_util.h
# morton/test_util.h
# morton/util.h
# morton/vector_types.h
Tests/high_load.cpp Tests/high_load.hpp)
target_link_libraries(Filters OpenSSL::SSL)