From b307ab935d8bfae78f0152602a5a1dbe18d80a49 Mon Sep 17 00:00:00 2001 From: Danny Robson Date: Fri, 16 Mar 2018 11:10:44 +1100 Subject: [PATCH] coord/simd: add initial simd tests --- CMakeLists.txt | 3 + coord/simd.hpp | 26 +++++ coord/simd_sse.hpp | 247 ++++++++++++++++++++++++++++++++++++++++++++ test/coord/simd.cpp | 34 ++++++ 4 files changed, 310 insertions(+) create mode 100644 coord/simd.hpp create mode 100644 coord/simd_sse.hpp create mode 100644 test/coord/simd.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5624bc43..c95d7cc7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -193,6 +193,8 @@ list ( coord/init.hpp coord/iostream.hpp coord/ops.hpp + coord/simd.hpp + coord/simd_sse.hpp coord/store.hpp coord/traits.hpp debug.cpp @@ -466,6 +468,7 @@ if (TESTS) colour comparator coord + coord/simd encode/base endian exe diff --git a/coord/simd.hpp b/coord/simd.hpp new file mode 100644 index 00000000..e3ae38dd --- /dev/null +++ b/coord/simd.hpp @@ -0,0 +1,26 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2018 Danny Robson + */ + +#ifndef CRUFT_UTIL_COORD_SIMD_HPP +#define CRUFT_UTIL_COORD_SIMD_HPP + +#ifdef __SSE__ +#include "simd_sse.hpp" +#else +#error "Unsupported SIMD architecture" +#endif + +#endif diff --git a/coord/simd_sse.hpp b/coord/simd_sse.hpp new file mode 100644 index 00000000..a544ce60 --- /dev/null +++ b/coord/simd_sse.hpp @@ -0,0 +1,247 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2018 Danny Robson + */ + + +#ifndef CRUFT_UTIL_COORD_SIMD_SSE_HPP +#define CRUFT_UTIL_COORD_SIMD_SSE_HPP + +#ifndef __SSE2__ +#error "SSE2 is required" +#endif + +#include +#include +#include + +#include + +#include + +namespace util::coord { + /////////////////////////////////////////////////////////////////////////// + namespace detail { + + } + + constexpr int alignment = 16; + + struct alignas (16) simd { + /////////////////////////////////////////////////////////////////////// + simd (float a, float b, float c, float d): + data (_mm_setr_ps (a, b, c, d)) + { ; } + + + //--------------------------------------------------------------------- + simd (float v): + data (_mm_set_ps1 (v)) + { ; } + + + //--------------------------------------------------------------------- + simd (__m128 _data): + data (_data) + { ; } + + + //--------------------------------------------------------------------- + operator __m128& () { return data; } + operator const __m128& () const { return data; } + + explicit operator bool () const; + + float operator[] (int idx) const { return data[idx]; } + + + /////////////////////////////////////////////////////////////////////// + __m128 data; + }; + + + /////////////////////////////////////////////////////////////////////////// + simd operator+ (simd a, simd b) { return _mm_add_ps (a, b); } + simd operator- (simd a, simd b) { return _mm_sub_ps (a, b); } + simd operator/ (simd a, simd b) { return _mm_div_ps (a, b); } + simd operator* (simd a, simd b) { return _mm_mul_ps (a, b); } + + + //------------------------------------------------------------------------- + simd operator< (simd a, simd b) { return _mm_cmplt_ps (a, b); } + simd operator<= (simd a, simd b) { return _mm_cmple_ps (a, b); } + simd operator> (simd a, simd b) { return _mm_cmpgt_ps (a, b); } + simd operator>= (simd a, simd b) { return _mm_cmpge_ps (a, b); } + simd operator== (simd a, simd b) { return _mm_cmpeq_ps (a, b); } + + + //------------------------------------------------------------------------- + simd operator| (simd a, simd b) { return _mm_or_ps (a, b); } + simd operator& (simd a, simd b) { return _mm_and_ps (a, b); } + + + /////////////////////////////////////////////////////////////////////////// + simd + select (simd mask, simd a, simd b) + { +#if defined(__SSE4_1__) + return _mm_blendv_ps (a, b, mask); +#else + return _mm_or_ps ( + _mm_and_ps (mask, a), + _mm_andnot_ps (mask, b) + ); +#endif + } + + + //------------------------------------------------------------------------- + bool + all (simd val) + { + return _mm_movemask_ps (val) == 0b1111; + } + + + //------------------------------------------------------------------------- + auto + clamp (simd val, simd lo, simd hi) + { + auto lo_mask = val > lo; + auto hi_mask = val < hi; + + auto res = (lo_mask & val) + } + + + /////////////////////////////////////////////////////////////////////////// + // use the same comparator in both because we're likely to use min + // and max near each other and the mask might be sharable this way. + simd min (simd a, simd b) { return select (a < b, a, b); } + simd max (simd a, simd b) { return select (a < b, b, a); } + + + /////////////////////////////////////////////////////////////////////////// +#if defined (__SSE3__) + simd + sum (simd a) + { + auto part = _mm_hadd_ps (a, a); + return _mm_hadd_ps (part, part); + } +#else + auto + sum (simd vals) + { + // swap pairs of components + // vals: 3 2 1 0 + // shuf: 2 3 0 1 + auto shuf = _mm_shuffle_ps (vals, vals, _MM_SHUFFLE(2, 3, 0, 1)); + + // combine the pairs + auto sums = _mm_add_ps (vals, shuf); + + // copy the lower components of sums up, then add with the original sums + // sums: 2+3 2+3 1+0 1+0 + // shuf: xxx xxx 2+3 2+3 + shuf = _mm_movehl_ps (shuf, sums); + sums = _mm_add_ss (sums, shuf); + + // sums: xxx xxx 0123 1234 + return _mm_cvtss_f32 (sums); + } +#endif + + + /////////////////////////////////////////////////////////////////////////// +#if defined(__SSE4_1__) + simd + dot (simd a, simd b) + { + return _mm_dp_ps (a, b, 0xff); + } +#elif defined(__SSE3__) + simd + dot (simd a, simd b) + { + return sum (a * b) + } +#else + auto + dot (simd a, simd b) + { + auto mul = a * b; + return sum (mul); + } +#endif + + + /////////////////////////////////////////////////////////////////////////// + simd sqrt (simd a) { return _mm_sqrt_ps (a); } + simd rsqrt (simd a) { return _mm_rsqrt_ps (a); } + + + /////////////////////////////////////////////////////////////////////////// + simd + norm2 (simd a) + { + return dot (a, a); + } + + + //------------------------------------------------------------------------- + simd + norm (simd a) + { + return sqrt (norm2 (a)); + } + + + //------------------------------------------------------------------------- + simd + normalised (simd a) + { + return a * rsqrt (norm (a)); + } + + + /////////////////////////////////////////////////////////////////////////// + auto + abs (simd a) + { + auto bffff = _mm_set1_epi32 (-1); + auto b7fff = _mm_srli_epi32 (bffff, 1); + auto mask = _mm_castsi128_ps (b7fff); + + return _mm_and_ps (mask, a); + } + + + + /////////////////////////////////////////////////////////////////////////// + simd + hypot (simd a) + { + return sqrt (sum (a * a)); + } + + + /////////////////////////////////////////////////////////////////////////// + simd::operator bool() const + { + return all (data); + } +} + +#endif diff --git a/test/coord/simd.cpp b/test/coord/simd.cpp new file mode 100644 index 00000000..99a3b120 --- /dev/null +++ b/test/coord/simd.cpp @@ -0,0 +1,34 @@ +#include "coord/simd.hpp" +#include "tap.hpp" + + +int +main () +{ + util::TAP::logger tap; + + { + const util::coord::simd a (1,2,3,4); + const util::coord::simd b (4,1,3,2); + const float res = dot (a, b); + tap.expect_eq (res, 4+2+9+8, "trivial dot product"); + } + + { + const util::coord::simd a (1, 2, 3, 4); + const util::coord::simd b (0, 3, 3, 9); + + const auto lo = min (a, b); + const auto hi = max (a, b); + + tap.expect_eq (lo, util::coord::simd {0,2,3,4}, "vector minimum"); + tap.expect_eq (hi, util::coord::simd {1,3,3,9}, "vector maximum"); + } + + { + const util::coord::simd val { -INFINITY, INFINITY, 0, -9 }; + tap.expect_eq (abs (val), util::coord::simd {INFINITY,INFINITY,0,9}, "absolute value"); + } + + return tap.status (); +} \ No newline at end of file