matrix: work around instruction selection bug in clang
This commit is contained in:
parent
720a45deb8
commit
2e5d69c095
26
matrix.hpp
26
matrix.hpp
@ -216,7 +216,31 @@ namespace util {
|
|||||||
>
|
>
|
||||||
constexpr
|
constexpr
|
||||||
matrix<R1,C2,T>
|
matrix<R1,C2,T>
|
||||||
operator* (const matrix<R1,C1,T>&, const matrix<R2,C2,T>&);
|
operator* (const matrix<R1,C1,T> &a, const matrix<R2,C2,T> &b) noexcept
|
||||||
|
{
|
||||||
|
static_assert (R2 == C1);
|
||||||
|
|
||||||
|
matrix<R1,C2,T> res {0};
|
||||||
|
|
||||||
|
// TODO: iterating over r,c rather than c,r will cause an ICE with
|
||||||
|
// clang#xxxx: 'X86 DAG->DAG Instruction Selection'.
|
||||||
|
//
|
||||||
|
// this is likely related to gold and LTO support. for the time being
|
||||||
|
// we switch the orders because it appears to confuse the optimiser
|
||||||
|
// sufficiently. :(
|
||||||
|
for (size_t c = 0; c < C2; ++c) {
|
||||||
|
for (size_t r = 0; r < R1; ++r) {
|
||||||
|
T accum{0};
|
||||||
|
|
||||||
|
for (size_t i = 0; i < R2; ++i)
|
||||||
|
accum += a[r][i] * b[i][c];
|
||||||
|
|
||||||
|
res[r][c] = accum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//-------------------------------------------------------------------------
|
//-------------------------------------------------------------------------
|
||||||
|
23
matrix.ipp
23
matrix.ipp
@ -99,29 +99,6 @@ MATRIX_SCALAR_OP(-)
|
|||||||
#undef MATRIX_SCALAR_OP
|
#undef MATRIX_SCALAR_OP
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
template <
|
|
||||||
std::size_t R1, std::size_t C1,
|
|
||||||
std::size_t R2, std::size_t C2,
|
|
||||||
typename T
|
|
||||||
>
|
|
||||||
constexpr
|
|
||||||
util::matrix<R1,C2,T>
|
|
||||||
util::operator* (const matrix<R1,C1,T> &a, const matrix<R2,C2,T> &b)
|
|
||||||
{
|
|
||||||
static_assert (R2 == C1);
|
|
||||||
|
|
||||||
matrix<R1,C2,T> res {0};
|
|
||||||
|
|
||||||
for (size_t r = 0; r < R1; ++r)
|
|
||||||
for (size_t c = 0; c < C2; ++c)
|
|
||||||
for (size_t i = 0; i < R2; ++i)
|
|
||||||
res[r][c] += a[r][i] * b[i][c];
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
template <size_t Rows, size_t Cols, typename T>
|
template <size_t Rows, size_t Cols, typename T>
|
||||||
constexpr
|
constexpr
|
||||||
|
Loading…
Reference in New Issue
Block a user