coord/simd: add initial simd tests
This commit is contained in:
parent
af5da5b846
commit
b307ab935d
@ -193,6 +193,8 @@ list (
|
||||
coord/init.hpp
|
||||
coord/iostream.hpp
|
||||
coord/ops.hpp
|
||||
coord/simd.hpp
|
||||
coord/simd_sse.hpp
|
||||
coord/store.hpp
|
||||
coord/traits.hpp
|
||||
debug.cpp
|
||||
@ -466,6 +468,7 @@ if (TESTS)
|
||||
colour
|
||||
comparator
|
||||
coord
|
||||
coord/simd
|
||||
encode/base
|
||||
endian
|
||||
exe
|
||||
|
26
coord/simd.hpp
Normal file
26
coord/simd.hpp
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Copyright 2018 Danny Robson <danny@nerdcruft.net>
|
||||
*/
|
||||
|
||||
#ifndef CRUFT_UTIL_COORD_SIMD_HPP
|
||||
#define CRUFT_UTIL_COORD_SIMD_HPP
|
||||
|
||||
#ifdef __SSE__
|
||||
#include "simd_sse.hpp"
|
||||
#else
|
||||
#error "Unsupported SIMD architecture"
|
||||
#endif
|
||||
|
||||
#endif
|
247
coord/simd_sse.hpp
Normal file
247
coord/simd_sse.hpp
Normal file
@ -0,0 +1,247 @@
|
||||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Copyright 2018 Danny Robson <danny@nerdcruft.net>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef CRUFT_UTIL_COORD_SIMD_SSE_HPP
|
||||
#define CRUFT_UTIL_COORD_SIMD_SSE_HPP
|
||||
|
||||
#ifndef __SSE2__
|
||||
#error "SSE2 is required"
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <pmmintrin.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace util::coord {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
namespace detail {
|
||||
|
||||
}
|
||||
|
||||
constexpr int alignment = 16;
|
||||
|
||||
struct alignas (16) simd {
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
simd (float a, float b, float c, float d):
|
||||
data (_mm_setr_ps (a, b, c, d))
|
||||
{ ; }
|
||||
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
simd (float v):
|
||||
data (_mm_set_ps1 (v))
|
||||
{ ; }
|
||||
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
simd (__m128 _data):
|
||||
data (_data)
|
||||
{ ; }
|
||||
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
operator __m128& () { return data; }
|
||||
operator const __m128& () const { return data; }
|
||||
|
||||
explicit operator bool () const;
|
||||
|
||||
float operator[] (int idx) const { return data[idx]; }
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__m128 data;
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
simd operator+ (simd a, simd b) { return _mm_add_ps (a, b); }
|
||||
simd operator- (simd a, simd b) { return _mm_sub_ps (a, b); }
|
||||
simd operator/ (simd a, simd b) { return _mm_div_ps (a, b); }
|
||||
simd operator* (simd a, simd b) { return _mm_mul_ps (a, b); }
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
simd operator< (simd a, simd b) { return _mm_cmplt_ps (a, b); }
|
||||
simd operator<= (simd a, simd b) { return _mm_cmple_ps (a, b); }
|
||||
simd operator> (simd a, simd b) { return _mm_cmpgt_ps (a, b); }
|
||||
simd operator>= (simd a, simd b) { return _mm_cmpge_ps (a, b); }
|
||||
simd operator== (simd a, simd b) { return _mm_cmpeq_ps (a, b); }
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
simd operator| (simd a, simd b) { return _mm_or_ps (a, b); }
|
||||
simd operator& (simd a, simd b) { return _mm_and_ps (a, b); }
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
simd
|
||||
select (simd mask, simd a, simd b)
|
||||
{
|
||||
#if defined(__SSE4_1__)
|
||||
return _mm_blendv_ps (a, b, mask);
|
||||
#else
|
||||
return _mm_or_ps (
|
||||
_mm_and_ps (mask, a),
|
||||
_mm_andnot_ps (mask, b)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
bool
|
||||
all (simd val)
|
||||
{
|
||||
return _mm_movemask_ps (val) == 0b1111;
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
auto
|
||||
clamp (simd val, simd lo, simd hi)
|
||||
{
|
||||
auto lo_mask = val > lo;
|
||||
auto hi_mask = val < hi;
|
||||
|
||||
auto res = (lo_mask & val)
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// use the same comparator in both because we're likely to use min
|
||||
// and max near each other and the mask might be sharable this way.
|
||||
simd min (simd a, simd b) { return select (a < b, a, b); }
|
||||
simd max (simd a, simd b) { return select (a < b, b, a); }
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
#if defined (__SSE3__)
|
||||
simd
|
||||
sum (simd a)
|
||||
{
|
||||
auto part = _mm_hadd_ps (a, a);
|
||||
return _mm_hadd_ps (part, part);
|
||||
}
|
||||
#else
|
||||
auto
|
||||
sum (simd vals)
|
||||
{
|
||||
// swap pairs of components
|
||||
// vals: 3 2 1 0
|
||||
// shuf: 2 3 0 1
|
||||
auto shuf = _mm_shuffle_ps (vals, vals, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
|
||||
// combine the pairs
|
||||
auto sums = _mm_add_ps (vals, shuf);
|
||||
|
||||
// copy the lower components of sums up, then add with the original sums
|
||||
// sums: 2+3 2+3 1+0 1+0
|
||||
// shuf: xxx xxx 2+3 2+3
|
||||
shuf = _mm_movehl_ps (shuf, sums);
|
||||
sums = _mm_add_ss (sums, shuf);
|
||||
|
||||
// sums: xxx xxx 0123 1234
|
||||
return _mm_cvtss_f32 (sums);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
#if defined(__SSE4_1__)
|
||||
simd
|
||||
dot (simd a, simd b)
|
||||
{
|
||||
return _mm_dp_ps (a, b, 0xff);
|
||||
}
|
||||
#elif defined(__SSE3__)
|
||||
simd
|
||||
dot (simd a, simd b)
|
||||
{
|
||||
return sum (a * b)
|
||||
}
|
||||
#else
|
||||
auto
|
||||
dot (simd a, simd b)
|
||||
{
|
||||
auto mul = a * b;
|
||||
return sum (mul);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
simd sqrt (simd a) { return _mm_sqrt_ps (a); }
|
||||
simd rsqrt (simd a) { return _mm_rsqrt_ps (a); }
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
simd
|
||||
norm2 (simd a)
|
||||
{
|
||||
return dot (a, a);
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
simd
|
||||
norm (simd a)
|
||||
{
|
||||
return sqrt (norm2 (a));
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
simd
|
||||
normalised (simd a)
|
||||
{
|
||||
return a * rsqrt (norm (a));
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
auto
|
||||
abs (simd a)
|
||||
{
|
||||
auto bffff = _mm_set1_epi32 (-1);
|
||||
auto b7fff = _mm_srli_epi32 (bffff, 1);
|
||||
auto mask = _mm_castsi128_ps (b7fff);
|
||||
|
||||
return _mm_and_ps (mask, a);
|
||||
}
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
simd
|
||||
hypot (simd a)
|
||||
{
|
||||
return sqrt (sum (a * a));
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
simd::operator bool() const
|
||||
{
|
||||
return all (data);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
34
test/coord/simd.cpp
Normal file
34
test/coord/simd.cpp
Normal file
@ -0,0 +1,34 @@
|
||||
#include "coord/simd.hpp"
|
||||
#include "tap.hpp"
|
||||
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
util::TAP::logger tap;
|
||||
|
||||
{
|
||||
const util::coord::simd a (1,2,3,4);
|
||||
const util::coord::simd b (4,1,3,2);
|
||||
const float res = dot (a, b);
|
||||
tap.expect_eq (res, 4+2+9+8, "trivial dot product");
|
||||
}
|
||||
|
||||
{
|
||||
const util::coord::simd a (1, 2, 3, 4);
|
||||
const util::coord::simd b (0, 3, 3, 9);
|
||||
|
||||
const auto lo = min (a, b);
|
||||
const auto hi = max (a, b);
|
||||
|
||||
tap.expect_eq (lo, util::coord::simd {0,2,3,4}, "vector minimum");
|
||||
tap.expect_eq (hi, util::coord::simd {1,3,3,9}, "vector maximum");
|
||||
}
|
||||
|
||||
{
|
||||
const util::coord::simd val { -INFINITY, INFINITY, 0, -9 };
|
||||
tap.expect_eq (abs (val), util::coord::simd {INFINITY,INFINITY,0,9}, "absolute value");
|
||||
}
|
||||
|
||||
return tap.status ();
|
||||
}
|
Loading…
Reference in New Issue
Block a user