cmake/nc_optimisation.cmake

231 lines
7.7 KiB
CMake

###############################################################################
if (__nc_optimisation)
return()
endif ()
set (__nc_optimisation TRUE)
###############################################################################
include ("${CMAKE_CURRENT_LIST_DIR}/compile_flag.cmake")
include ("${CMAKE_CURRENT_LIST_DIR}/link_flag.cmake")
include ("${CMAKE_CURRENT_LIST_DIR}/canonical_host.cmake")
###############################################################################
option(LTO "enable link-time optimisation" OFF)
##-----------------------------------------------------------------------------
## Enable link-time optimisation.
##
## The INTERPROCEDURAL_OPTIMISATION flag is silently ignored for almost all
## compilers. Fuck you too CMake. We have to make up for its deficiencies
## ourselves.
if (LTO)
# Add the linker flags first otherwise the linker may not recognise the
# object format
append_link_flag("-fuse-linker-plugin")
# HACK: we shouldn't use 'auto' as it will likely lead to oversubscription
# but I'm sick of holding GCC's hand today.
append_first_link_flag("-flto=auto" "-flto")
# Enable LTO on the compilation side, but try very hard to avoid
# situations where we may accidentally use regular/fat objects.
#
# GCC 12 will emit a warning about serial compilation of LTRANS jobs if an
# explicit parameter is not passed. It must be an integer, auto, or
# jobserver.
# Clang will only accept 'full', 'thin', or no argument.
#
# HACK: we shouldn't use 'auto' as it will likely lead to oversubscription
# but I'm sick of holding GCC's hand today.
append_first_compile_flag("-flto=auto" "-flto")
append_compile_flag("-fno-fat-lto-objects")
append_compile_flag("-flto-jobs=0")
# Try to squeeze out some more diagnostics via LTO
append_compile_flag("-flto-odr-type-merging")
# GCC: Attempt to use a more aggressive whole-program style of LTO
# NOTE: It's unclear if this _actually_ buys us performance or if it just
# slows down linking (by a substantial factor).
#append_compile_flag("-flto-partition=none")
# Throw in some optimisation flags that are LTO specific. We don't
# particularly care about checking Debug/Release here because LTO is
# pretty heavyweight anyway.
append_compile_flag("-fdevirtualize-at-ltrans")
# If we're using GCC we probably need to use gcc-{ar,nm,ranlib} so that
# plugin support works. Ideally we'd actually do some compilation tests,
# but... it's a royal PITA with CMake.
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set (CMAKE_AR "gcc-ar")
set (CMAKE_NM "gcc-nm")
set (CMAKE_RANLIB "gcc-ranlib")
endif ()
endif ()
###############################################################################
option(FRAMEPOINTER "retain the framepointer even if optimising" OFF)
##-----------------------------------------------------------------------------
if (FRAMEPOINTER)
append_compile_flag("-fno-omit-frame-pointer")
endif ()
###############################################################################
canonical_host()
##-----------------------------------------------------------------------------
if (${host_cpu} STREQUAL "x86_64")
# Explicitly qualify the CPU as the minimum viable architecture, but tune
# for common platforms. This might prevent some invalid instructions being
# emitted.
append_compile_flag(-march=x86-64)
append_compile_flag(-mtune=generic)
append_compile_flag(-msse)
append_compile_flag(-msse2)
append_compile_flag(-mssse3)
append_compile_flag(-msse3)
append_compile_flag(-mavx)
append_compile_flag(-mno-avx2)
# These instructions should always be available on reasonable platforms.
# They're almost 20 years old at this point.
#append_compile_flag(-msse)
#append_compile_flag(-msse2)
# append_compile_flag(-mssse3)
# append_compile_flag(-msahf)
elseif (${host_cpu} STREQUAL "i686")
append_compile_flag(-march=prescott)
append_compile_flag(-mtune=generic)
append_compile_flag(-mcmov)
append_compile_flag(-mfpmath=sse)
else ()
message (WARNING "Unknown architecture. Not attempting performance options")
endif ()
###############################################################################
if (CMAKE_BUILD_TYPE MATCHES Debug)
else()
# unsafe maths
# append_compile_flag(-funsafe-math-optimizations)
# append_compile_flag(-ffast-math)
# append_compile_flag(-fno-finite-math-only)
# vectorisation
append_compile_flag(-ftree-vectorize)
append_compile_flag(-fvectorize)
append_compile_flag(-fslp-vectorize)
# loop hosting/distribution
append_compile_flag(-floop-nest-optimize)
append_compile_flag(-ftree-loop-distribution)
append_compile_flag(-ftree-loop-distribute-patterns)
append_compile_flag(-ftree-loop-im)
append_compile_flag(-ftree-loop-if-convert-stores)
append_compile_flag(-fivopts)
append_compile_flag(-funsafe-loop-optimizations)
append_compile_flag(-floop-interchange)
append_compile_flag(-fpredictive-commoning)
append_compile_flag(-funswitch-loops)
# GCC 8.2 encounters an ICE in LTO linking with ipa-pta enabled
append_compile_flag (-fipa-pta)
# safety removal for performance
append_compile_flag(-fno-stack-protector)
endif()
###############################################################################
if (CMAKE_BUILD_TYPE MATCHES Debug)
else ()
append_compile_flag(-fdevirtualize)
append_compile_flag(-fdevirtualize-speculatively)
check_link_flag(TEST_GC_SECTIONS, "-Wl,--gc-sections")
if (TEST_LD_GC_SECTIONS)
append_compile_flag(-fdata-sections)
append_compile_flag(-ffunction-sections)
append_link_flag("-Wl,--gc-sections")
endif ()
endif ()
###############################################################################
if (CMAKE_BUILD_TYPE MATCHES Debug)
set (_DEFAULT_RUNTIME_DEBUGGING ON)
else()
set (_DEFAULT_RUNTIME_DEBUGGING OFF)
endif()
#message(FATAL_ERROR "RUNTIME DEBUGGING ${_DEFAULT_RUNTIME_DEBUGGING}")
option(
RUNTIME_DEBUGGING
"enable debugging features that impact runtime and ABI"
${_DEFAULT_RUNTIME_DEBUGGING}
)
if (RUNTIME_DEBUGGING)
if (CMAKE_BUILD_TYPE MATCHES Debug)
else()
message(FATAL_ERROR "RUNTIME_DEBUGGING is only supported under a DEBUG build")
endif()
endif ()
if (CMAKE_BUILD_TYPE MATCHES Debug)
add_definitions(-DENABLE_DEBUGGING)
if (RUNTIME_DEBUGGING)
if ("${STDLIB}" STREQUAL "libstdc++")
add_definitions(-D_GLIBCXX_DEBUG)
endif()
endif()
# Don't make any commits that use -O0 by default. Instead, either add an
# argument that allows one to choose, or temporarily switch the lines
# below locally. -O0 is just too slow for computationally heavy projects.
#append_compile_flag(-Og)
append_compile_flag(-O0)
append_compile_flag(-Werror)
# stack protection tends to cause segfaults in the generated binary
# occuring in the standard library under msys2 and related compilers.
#
# it's easily shown by simply creating a local std::string. rather than
# get to the root cause we just disable the protection under windows.
if (NOT WIN32)
append_compile_flag(-fstack-protector)
endif()
else ()
append_compile_flag(-O2)
append_compile_flag(-fno-rtti)
add_definitions(-DNO_RTTI)
add_definitions(-DNDEBUG)
endif ()
###############################################################################
append_compile_flag(-g)
# gcc#: -ggdb tends to trigger an ICE in 'trunc_int_for_mode' under GCC with
# libcruft-util's coord code. It's not strictly necessary so disable it for
# now.
#append_compile_flag(-ggdb)