192 lines
6.3 KiB
CMake
192 lines
6.3 KiB
CMake
###############################################################################
|
|
if (__nc_optimisation)
|
|
return()
|
|
endif ()
|
|
|
|
set (__nc_optimisation TRUE)
|
|
|
|
|
|
###############################################################################
|
|
include (compile_flag)
|
|
include (link_flag)
|
|
include (canonical_host)
|
|
|
|
|
|
###############################################################################
|
|
option(LTO "enable link-time optimisation" OFF)
|
|
|
|
|
|
##-----------------------------------------------------------------------------
|
|
## Enable link-time optimisation.
|
|
##
|
|
## The INTERPROCEDURAL_OPTIMISATION flag is silently ignored for almost all
|
|
## compilers. Fuck you too CMake. We have to make up for its deficiencies
|
|
## ourselves.
|
|
if (LTO)
|
|
# Add the linker flags first otherwise the linker may not recognise the
|
|
# object format
|
|
append_link_flag("-fuse-linker-plugin")
|
|
append_link_flag("-flto")
|
|
|
|
# Enable LTO on the compilation side, but try very hard to avoid
|
|
# situations where we may accidentally use regular/fat objects.
|
|
append_compile_flag("-flto")
|
|
append_compile_flag("-fno-fat-lto-objects")
|
|
|
|
# Try to squeeze out some more diagnostics via LTO
|
|
append_compile_flag("-flto-odr-type-merging")
|
|
|
|
# GCC: Attempt to use a more aggressive whole-program style of LTO
|
|
append_compile_flag("-flto-partition=none")
|
|
|
|
# Throw in some optimisation flags that are LTO specific. We don't
|
|
# particularly care about checking Debug/Release here because LTO is
|
|
# pretty heavyweight anyway.
|
|
append_compile_flag("-fdevirtualize-at-ltrans")
|
|
|
|
# If we're using GCC we probably need to use gcc-{ar,nm,ranlib} so that
|
|
# plugin support works. Ideally we'd actually do some compilation tests,
|
|
# but... it's a royal PITA with CMake.
|
|
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
|
set (CMAKE_AR "gcc-ar")
|
|
set (CMAKE_NM "gcc-nm")
|
|
set (CMAKE_RANLIB "gcc-ranlib")
|
|
endif ()
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
option(FRAMEPOINTER "retain the framepointer even if optimising" OFF)
|
|
|
|
|
|
##-----------------------------------------------------------------------------
|
|
if (FRAMEPOINTER)
|
|
append_compile_flag("-fno-omit-frame-pointer")
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
canonical_host()
|
|
|
|
|
|
##-----------------------------------------------------------------------------
|
|
if (${host_cpu} STREQUAL "x86_64")
|
|
# Explicitly qualify the CPU as the minimum viable architecture, but tune
|
|
# for common platforms. This might prevent some invalid instructions being
|
|
# emitted.
|
|
append_compile_flag(-march=x86-64)
|
|
append_compile_flag(-mtune=generic)
|
|
|
|
append_compile_flag(-msse)
|
|
append_compile_flag(-msse2)
|
|
append_compile_flag(-mssse3)
|
|
|
|
append_compile_flag(-mno-sse3)
|
|
append_compile_flag(-mno-avx)
|
|
append_compile_flag(-mno-avx2)
|
|
|
|
# These instructions should always be available on reasonable platforms.
|
|
# They're almost 20 years old at this point.
|
|
#append_compile_flag(-msse)
|
|
#append_compile_flag(-msse2)
|
|
# append_compile_flag(-mssse3)
|
|
# append_compile_flag(-msahf)
|
|
elseif (${host_cpu} STREQUAL "i686")
|
|
append_compile_flag(-march=prescott)
|
|
append_compile_flag(-mtune=generic)
|
|
append_compile_flag(-mcmov)
|
|
append_compile_flag(-mfpmath=sse)
|
|
else ()
|
|
message (WARNING "Unknown architecture. Not attempting performance options")
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
if (CMAKE_BUILD_TYPE MATCHES Debug)
|
|
else()
|
|
# unsafe maths
|
|
# append_compile_flag(-funsafe-math-optimizations)
|
|
# append_compile_flag(-ffast-math)
|
|
# append_compile_flag(-fno-finite-math-only)
|
|
|
|
# vectorisation
|
|
append_compile_flag(-ftree-vectorize)
|
|
append_compile_flag(-fvectorize)
|
|
append_compile_flag(-fslp-vectorize)
|
|
|
|
# loop hosting/distribution
|
|
append_compile_flag(-floop-nest-optimize)
|
|
|
|
append_compile_flag(-ftree-loop-distribution)
|
|
append_compile_flag(-ftree-loop-distribute-patterns)
|
|
append_compile_flag(-ftree-loop-im)
|
|
append_compile_flag(-ftree-loop-if-convert-stores)
|
|
|
|
append_compile_flag(-fivopts)
|
|
|
|
append_compile_flag(-funsafe-loop-optimizations)
|
|
append_compile_flag(-floop-interchange)
|
|
|
|
append_compile_flag(-fpredictive-commoning)
|
|
append_compile_flag(-funswitch-loops)
|
|
|
|
# GCC 8.2 encounters an ICE in LTO linking with ipa-pta enabled
|
|
append_compile_flag (-fipa-pta)
|
|
|
|
# safety removal for performance
|
|
append_compile_flag(-fno-stack-protector)
|
|
endif()
|
|
|
|
|
|
###############################################################################
|
|
if (CMAKE_BUILD_TYPE MATCHES Debug)
|
|
else ()
|
|
append_compile_flag(-fdevirtualize)
|
|
append_compile_flag(-fdevirtualize-speculatively)
|
|
|
|
check_link_flag(TEST_GC_SECTIONS, "-Wl,--gc-sections")
|
|
if (TEST_LD_GC_SECTIONS)
|
|
append_compile_flag(-fdata-sections)
|
|
append_compile_flag(-ffunction-sections)
|
|
append_link_flag("-Wl,--gc-sections")
|
|
endif ()
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
if (CMAKE_BUILD_TYPE MATCHES Debug)
|
|
add_definitions(-DENABLE_DEBUGGING)
|
|
add_definitions(-D_GLIBCXX_DEBUG)
|
|
|
|
# Don't make any commits that use -O0 by default. Instead, either add an
|
|
# argument that allows one to choose, or temporarily switch the lines
|
|
# below locally. -O0 is just too slow for computationally heavy projects.
|
|
#append_compile_flag(-Og)
|
|
append_compile_flag(-O0)
|
|
|
|
append_compile_flag(-Werror)
|
|
|
|
# stack protection tends to cause segfaults in the generated binary
|
|
# occuring in the standard library under msys2 and related compilers.
|
|
#
|
|
# it's easily shown by simply creating a local std::string. rather than
|
|
# get to the root cause we just disable the protection under windows.
|
|
if (NOT WIN32)
|
|
append_compile_flag(-fstack-protector)
|
|
endif()
|
|
else ()
|
|
append_compile_flag(-O2)
|
|
append_compile_flag(-fno-rtti)
|
|
add_definitions(-DNO_RTTI)
|
|
add_definitions(-DNDEBUG)
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
append_compile_flag(-g)
|
|
|
|
# gcc#: -ggdb tends to trigger an ICE in 'trunc_int_for_mode' under GCC with
|
|
# libcruft-util's coord code. It's not strictly necessary so disable it for
|
|
# now.
|
|
#append_compile_flag(-ggdb)
|