Danny Robson
54f0855312
This means that we no longer need to modify the module path for CMake before including the root script.
195 lines
6.5 KiB
CMake
195 lines
6.5 KiB
CMake
###############################################################################
|
|
if (__nc_optimisation)
|
|
return()
|
|
endif ()
|
|
|
|
set (__nc_optimisation TRUE)
|
|
|
|
|
|
###############################################################################
|
|
include ("${CMAKE_CURRENT_LIST_DIR}/compile_flag.cmake")
|
|
include ("${CMAKE_CURRENT_LIST_DIR}/link_flag.cmake")
|
|
include ("${CMAKE_CURRENT_LIST_DIR}/canonical_host.cmake")
|
|
|
|
|
|
###############################################################################
|
|
option(LTO "enable link-time optimisation" OFF)
|
|
|
|
|
|
##-----------------------------------------------------------------------------
|
|
## Enable link-time optimisation.
|
|
##
|
|
## The INTERPROCEDURAL_OPTIMISATION flag is silently ignored for almost all
|
|
## compilers. Fuck you too CMake. We have to make up for its deficiencies
|
|
## ourselves.
|
|
if (LTO)
|
|
# Add the linker flags first otherwise the linker may not recognise the
|
|
# object format
|
|
append_link_flag("-fuse-linker-plugin")
|
|
append_link_flag("-flto")
|
|
|
|
# Enable LTO on the compilation side, but try very hard to avoid
|
|
# situations where we may accidentally use regular/fat objects.
|
|
append_compile_flag("-flto")
|
|
append_compile_flag("-fno-fat-lto-objects")
|
|
append_compile_flag("-flto-jobs=0")
|
|
|
|
# Try to squeeze out some more diagnostics via LTO
|
|
append_compile_flag("-flto-odr-type-merging")
|
|
|
|
# GCC: Attempt to use a more aggressive whole-program style of LTO
|
|
# NOTE: It's unclear if this _actually_ buys us performance or if it just
|
|
# slows down linking (by a substantial factor).
|
|
#append_compile_flag("-flto-partition=none")
|
|
|
|
# Throw in some optimisation flags that are LTO specific. We don't
|
|
# particularly care about checking Debug/Release here because LTO is
|
|
# pretty heavyweight anyway.
|
|
append_compile_flag("-fdevirtualize-at-ltrans")
|
|
|
|
# If we're using GCC we probably need to use gcc-{ar,nm,ranlib} so that
|
|
# plugin support works. Ideally we'd actually do some compilation tests,
|
|
# but... it's a royal PITA with CMake.
|
|
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
|
set (CMAKE_AR "gcc-ar")
|
|
set (CMAKE_NM "gcc-nm")
|
|
set (CMAKE_RANLIB "gcc-ranlib")
|
|
endif ()
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
option(FRAMEPOINTER "retain the framepointer even if optimising" OFF)
|
|
|
|
|
|
##-----------------------------------------------------------------------------
|
|
if (FRAMEPOINTER)
|
|
append_compile_flag("-fno-omit-frame-pointer")
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
canonical_host()
|
|
|
|
|
|
##-----------------------------------------------------------------------------
|
|
if (${host_cpu} STREQUAL "x86_64")
|
|
# Explicitly qualify the CPU as the minimum viable architecture, but tune
|
|
# for common platforms. This might prevent some invalid instructions being
|
|
# emitted.
|
|
append_compile_flag(-march=x86-64)
|
|
append_compile_flag(-mtune=generic)
|
|
|
|
append_compile_flag(-msse)
|
|
append_compile_flag(-msse2)
|
|
append_compile_flag(-mssse3)
|
|
append_compile_flag(-msse3)
|
|
append_compile_flag(-mavx)
|
|
|
|
append_compile_flag(-mno-avx2)
|
|
|
|
# These instructions should always be available on reasonable platforms.
|
|
# They're almost 20 years old at this point.
|
|
#append_compile_flag(-msse)
|
|
#append_compile_flag(-msse2)
|
|
# append_compile_flag(-mssse3)
|
|
# append_compile_flag(-msahf)
|
|
elseif (${host_cpu} STREQUAL "i686")
|
|
append_compile_flag(-march=prescott)
|
|
append_compile_flag(-mtune=generic)
|
|
append_compile_flag(-mcmov)
|
|
append_compile_flag(-mfpmath=sse)
|
|
else ()
|
|
message (WARNING "Unknown architecture. Not attempting performance options")
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
if (CMAKE_BUILD_TYPE MATCHES Debug)
|
|
else()
|
|
# unsafe maths
|
|
# append_compile_flag(-funsafe-math-optimizations)
|
|
# append_compile_flag(-ffast-math)
|
|
# append_compile_flag(-fno-finite-math-only)
|
|
|
|
# vectorisation
|
|
append_compile_flag(-ftree-vectorize)
|
|
append_compile_flag(-fvectorize)
|
|
append_compile_flag(-fslp-vectorize)
|
|
|
|
# loop hosting/distribution
|
|
append_compile_flag(-floop-nest-optimize)
|
|
|
|
append_compile_flag(-ftree-loop-distribution)
|
|
append_compile_flag(-ftree-loop-distribute-patterns)
|
|
append_compile_flag(-ftree-loop-im)
|
|
append_compile_flag(-ftree-loop-if-convert-stores)
|
|
|
|
append_compile_flag(-fivopts)
|
|
|
|
append_compile_flag(-funsafe-loop-optimizations)
|
|
append_compile_flag(-floop-interchange)
|
|
|
|
append_compile_flag(-fpredictive-commoning)
|
|
append_compile_flag(-funswitch-loops)
|
|
|
|
# GCC 8.2 encounters an ICE in LTO linking with ipa-pta enabled
|
|
append_compile_flag (-fipa-pta)
|
|
|
|
# safety removal for performance
|
|
append_compile_flag(-fno-stack-protector)
|
|
endif()
|
|
|
|
|
|
###############################################################################
|
|
if (CMAKE_BUILD_TYPE MATCHES Debug)
|
|
else ()
|
|
append_compile_flag(-fdevirtualize)
|
|
append_compile_flag(-fdevirtualize-speculatively)
|
|
|
|
check_link_flag(TEST_GC_SECTIONS, "-Wl,--gc-sections")
|
|
if (TEST_LD_GC_SECTIONS)
|
|
append_compile_flag(-fdata-sections)
|
|
append_compile_flag(-ffunction-sections)
|
|
append_link_flag("-Wl,--gc-sections")
|
|
endif ()
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
if (CMAKE_BUILD_TYPE MATCHES Debug)
|
|
add_definitions(-DENABLE_DEBUGGING)
|
|
add_definitions(-D_GLIBCXX_DEBUG)
|
|
|
|
# Don't make any commits that use -O0 by default. Instead, either add an
|
|
# argument that allows one to choose, or temporarily switch the lines
|
|
# below locally. -O0 is just too slow for computationally heavy projects.
|
|
#append_compile_flag(-Og)
|
|
append_compile_flag(-O0)
|
|
|
|
append_compile_flag(-Werror)
|
|
|
|
# stack protection tends to cause segfaults in the generated binary
|
|
# occuring in the standard library under msys2 and related compilers.
|
|
#
|
|
# it's easily shown by simply creating a local std::string. rather than
|
|
# get to the root cause we just disable the protection under windows.
|
|
if (NOT WIN32)
|
|
append_compile_flag(-fstack-protector)
|
|
endif()
|
|
else ()
|
|
append_compile_flag(-O2)
|
|
append_compile_flag(-fno-rtti)
|
|
add_definitions(-DNO_RTTI)
|
|
add_definitions(-DNDEBUG)
|
|
endif ()
|
|
|
|
|
|
###############################################################################
|
|
append_compile_flag(-g)
|
|
|
|
# gcc#: -ggdb tends to trigger an ICE in 'trunc_int_for_mode' under GCC with
|
|
# libcruft-util's coord code. It's not strictly necessary so disable it for
|
|
# now.
|
|
#append_compile_flag(-ggdb)
|