From 2e5d69c095d1f3c14211806c1b396c001e36e68d Mon Sep 17 00:00:00 2001
From: Danny Robson <danny@nerdcruft.net>
Date: Tue, 10 Oct 2017 14:09:07 +1100
Subject: [PATCH] matrix: work around instruction selection bug in clang

---
 matrix.hpp | 26 +++++++++++++++++++++++++-
 matrix.ipp | 23 -----------------------
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/matrix.hpp b/matrix.hpp
index 270f6742..c3cc4f77 100644
--- a/matrix.hpp
+++ b/matrix.hpp
@@ -216,7 +216,31 @@ namespace util {
     >
     constexpr
     matrix<R1,C2,T>
-    operator* (const matrix<R1,C1,T>&, const matrix<R2,C2,T>&);
+    operator* (const matrix<R1,C1,T> &a, const matrix<R2,C2,T> &b) noexcept
+    {
+        static_assert (R2 == C1);
+
+        matrix<R1,C2,T> res {0};
+
+        // TODO: iterating over r,c rather than c,r will cause an ICE with
+        // clang#xxxx: 'X86 DAG->DAG Instruction Selection'.
+        //
+        // this is likely related to gold and LTO support. for the time being
+        // we switch the orders because it appears to confuse the optimiser
+        // sufficiently. :(
+        for (size_t c = 0; c < C2; ++c) {
+            for (size_t r = 0; r < R1; ++r) {
+                T accum{0};
+
+                for (size_t i = 0; i < R2; ++i)
+                    accum += a[r][i] * b[i][c];
+
+                res[r][c] = accum;
+            }
+        }
+
+        return res;
+    }
 
 
     //-------------------------------------------------------------------------
diff --git a/matrix.ipp b/matrix.ipp
index 7a2e0b69..e934426e 100644
--- a/matrix.ipp
+++ b/matrix.ipp
@@ -99,29 +99,6 @@ MATRIX_SCALAR_OP(-)
 #undef MATRIX_SCALAR_OP
 
 
-///////////////////////////////////////////////////////////////////////////////
-template <
-    std::size_t R1, std::size_t C1,
-    std::size_t R2, std::size_t C2,
-    typename T
->
-constexpr
-util::matrix<R1,C2,T>
-util::operator* (const matrix<R1,C1,T> &a, const matrix<R2,C2,T> &b)
-{
-    static_assert (R2 == C1);
-
-    matrix<R1,C2,T> res {0};
-
-    for (size_t r = 0; r < R1; ++r)
-        for (size_t c = 0; c < C2; ++c)
-            for (size_t i = 0; i < R2; ++i)
-                res[r][c] += a[r][i] * b[i][c];
-
-    return res;
-}
-
-
 ///////////////////////////////////////////////////////////////////////////////
 template <size_t Rows, size_t Cols, typename T>
 constexpr