matrix: work around instruction selection bug in clang

2017-10-10 14:09:07 +11:00 · 2017-10-10 14:09:07 +11:00 · 2e5d69c095
commit 2e5d69c095
parent 720a45deb8
2 changed files with 25 additions and 24 deletions
--- a/matrix.hpp
+++ b/matrix.hpp
@ -216,7 +216,31 @@ namespace util {
    >
    constexpr
    matrix<R1,C2,T>
-    operator* (const matrix<R1,C1,T>&, const matrix<R2,C2,T>&);
+    operator* (const matrix<R1,C1,T> &a, const matrix<R2,C2,T> &b) noexcept
+    {
+        static_assert (R2 == C1);
+
+        matrix<R1,C2,T> res {0};
+
+        // TODO: iterating over r,c rather than c,r will cause an ICE with
+        // clang#xxxx: 'X86 DAG->DAG Instruction Selection'.
+        //
+        // this is likely related to gold and LTO support. for the time being
+        // we switch the orders because it appears to confuse the optimiser
+        // sufficiently. :(
+        for (size_t c = 0; c < C2; ++c) {
+            for (size_t r = 0; r < R1; ++r) {
+                T accum{0};
+
+                for (size_t i = 0; i < R2; ++i)
+                    accum += a[r][i] * b[i][c];
+
+                res[r][c] = accum;
+            }
+        }
+
+        return res;
+    }


    //-------------------------------------------------------------------------
--- a/matrix.ipp
+++ b/matrix.ipp
@ -99,29 +99,6 @@ MATRIX_SCALAR_OP(-)
 #undef MATRIX_SCALAR_OP


-///////////////////////////////////////////////////////////////////////////////
-template <
-    std::size_t R1, std::size_t C1,
-    std::size_t R2, std::size_t C2,
-    typename T
->
-constexpr
-util::matrix<R1,C2,T>
-util::operator* (const matrix<R1,C1,T> &a, const matrix<R2,C2,T> &b)
-{
-    static_assert (R2 == C1);
-
-    matrix<R1,C2,T> res {0};
-
-    for (size_t r = 0; r < R1; ++r)
-        for (size_t c = 0; c < C2; ++c)
-            for (size_t i = 0; i < R2; ++i)
-                res[r][c] += a[r][i] * b[i][c];
-
-    return res;
-}
-
-
 ///////////////////////////////////////////////////////////////////////////////
 template <size_t Rows, size_t Cols, typename T>
 constexpr