diff --git a/m4/nc_cxx.m4 b/m4/nc_cxx.m4 index 26082271..b7faa3b7 100644 --- a/m4/nc_cxx.m4 +++ b/m4/nc_cxx.m4 @@ -23,4 +23,5 @@ AC_DEFUN([NC_CXX],[ [])], [], [AC_DEFINE([final], [], [Pretend about final keyword support])]) + AX_APPEND_COMPILE_FLAGS([-fvisibility=hidden], [], [-Werror]) ]) diff --git a/m4/nc_optimisation.m4 b/m4/nc_optimisation.m4 index 10c8bc95..194c5272 100644 --- a/m4/nc_optimisation.m4 +++ b/m4/nc_optimisation.m4 @@ -54,6 +54,8 @@ AC_DEFUN([NC_OPTIMISATION],[ ) AX_APPEND_COMPILE_FLAGS([-pipe]) + + # base instruction set requirements for x86 AX_APPEND_COMPILE_FLAGS([-mfpmath=sse], [], [-Werror]) AX_APPEND_COMPILE_FLAGS([-msahf], [], [-Werror]) @@ -61,30 +63,34 @@ AC_DEFUN([NC_OPTIMISATION],[ ## Enable aggressive code generation optimisations AS_IF([test "x$enable_debugging" != "xyes"], [ - AX_APPEND_COMPILE_FLAGS([-ftree-loop-distribute-patterns], [], [-Werror]) - AX_APPEND_COMPILE_FLAGS([-ftree-loop-if-convert-stores], [], [-Werror]) + # gcc vectorisation AX_APPEND_COMPILE_FLAGS([-ftree-vectorize], [], [-Werror]) - AX_APPEND_COMPILE_FLAGS([-funsafe-loop-optimizations], [], [-Werror]) - # gcc >= 4.8 defaults to enabling stack-protector, we care more about - # performance than security. - AX_APPEND_COMPILE_FLAGS([-fno-stack-protector], [], [-Werror]) - - AX_APPEND_COMPILE_FLAGS([-ftree-loop-linear], [], [-Werror]) - AX_APPEND_COMPILE_FLAGS([-floop-interchange], [], [-Werror]) + # clang vectorisation + AX_APPEND_COMPILE_FLAGS([-fvectorize], [], [-Werror]) + AX_APPEND_COMPILE_FLAGS([-fslp-vectorize], [], [-Werror]) + AX_APPEND_COMPILE_FLAGS([-fslp-vectorize-aggressive], [], [-Werror]) + # loop hosting/distribution + AX_APPEND_COMPILE_FLAGS([-ftree-loop-distribute-patterns], [], [-Werror]) AX_APPEND_COMPILE_FLAGS([-ftree-loop-distribution], [], [-Werror]) - AX_APPEND_COMPILE_FLAGS([-ftree-loop-distribute-patterns], [], [-Werror]) - AX_APPEND_COMPILE_FLAGS([-ftree-vectorize], [], [-Werror]) + AX_APPEND_COMPILE_FLAGS([-ftree-loop-if-convert-stores], [], [-Werror]) + AX_APPEND_COMPILE_FLAGS([-ftree-loop-linear], [], [-Werror]) + + AX_APPEND_COMPILE_FLAGS([-funsafe-loop-optimizations], [], [-Werror]) AX_APPEND_COMPILE_FLAGS([-floop-interchange], [], [-Werror]) - + # safety removal for performance + AX_APPEND_COMPILE_FLAGS([-fno-stack-protector], [], [-Werror]) ]) ##------------------------------------------------------------------------- ## Enable code size optimisations (that don't impact performance) ## Note: we assume CXX, and that CXXLINK is g++ not ld, hence the -Wl opt AS_IF([test "x$enable_debugging" != "xyes"], [ + AX_APPEND_COMPILE_FLAGS([-fdevirtualize], [], [-Werror]) + AX_APPEND_COMPILE_FLAGS([-fdevirtualize-speculatively], [], [-Werror]) + AX_CHECK_LINK_FLAG([-Wl,--gc-sections], [ AX_APPEND_COMPILE_FLAGS([-fdata-sections], [], [-Werror]) AX_APPEND_COMPILE_FLAGS([-ffunction-sections], [], [-Werror])