libcruft-util/coord/simd_sse.hpp

406 lines
11 KiB
C++
Raw Normal View History

2018-03-16 11:10:44 +11:00
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright 2018 Danny Robson <danny@nerdcruft.net>
*/
#ifndef CRUFT_UTIL_COORD_SIMD_SSE_HPP
#define CRUFT_UTIL_COORD_SIMD_SSE_HPP
2018-03-20 13:30:05 +11:00
#ifndef __SSE3__
#error "SSE3 is required"
2018-03-16 11:10:44 +11:00
#endif
#include <xmmintrin.h>
#include <pmmintrin.h>
#include <immintrin.h>
#include <array>
2018-03-22 16:10:06 +11:00
#include <iosfwd>
2018-03-16 11:10:44 +11:00
namespace util::coord {
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
constexpr int alignment = 16;
2018-03-16 11:10:44 +11:00
2018-03-23 17:52:08 +11:00
template <size_t CountV, typename ValueT>
struct native_type { };
template <> struct native_type<1,float> { using type = __m128; };
template <> struct native_type<2,float> { using type = __m128; };
template <> struct native_type<3,float> { using type = __m128; };
template <> struct native_type<4,float> { using type = __m128; };
template <> struct native_type<1,double> { using type = __m128d; };
template <> struct native_type<2,double> { using type = __m128d; };
template <> struct native_type<1,uint32_t> { using type = __m128i; };
template <> struct native_type<2,uint32_t> { using type = __m128i; };
template <> struct native_type<3,uint32_t> { using type = __m128i; };
template <> struct native_type<4,uint32_t> { using type = __m128i; };
2018-03-16 11:10:44 +11:00
2018-03-23 17:52:08 +11:00
template <size_t CountV, typename ValueT>
2018-03-16 11:10:44 +11:00
struct alignas (16) simd {
///////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
simd (ValueT a, ValueT b, ValueT c, ValueT d):
2018-03-16 11:10:44 +11:00
data (_mm_setr_ps (a, b, c, d))
{ ; }
//---------------------------------------------------------------------
2018-03-23 17:52:08 +11:00
simd (ValueT v):
2018-03-16 11:10:44 +11:00
data (_mm_set_ps1 (v))
{ ; }
//---------------------------------------------------------------------
simd (__m128 _data):
data (_data)
{ ; }
//---------------------------------------------------------------------
2018-03-20 13:30:05 +11:00
explicit operator __m128& () { return data; }
explicit operator const __m128& () const { return data; }
2018-03-16 11:10:44 +11:00
explicit operator bool () const;
2018-03-23 17:52:08 +11:00
ValueT operator[] (int idx) const { return data[idx]; }
2018-03-16 11:10:44 +11:00
///////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t IndexV>
struct accessor {
operator ValueT () const noexcept
{
#ifdef __SSE4_1__
return _mm_extrat_epi32 (data, IndexV);
#else
return _mm_cvtss_f32 (
_mm_shuffle_ps (
data,
data,
_MM_SHUFFLE (IndexV, IndexV, IndexV, IndexV)
)
);
#endif
}
accessor& operator= (ValueT);
__m128 data;
};
union {
__m128 data;
accessor<0> x;
accessor<1> y;
accessor<2> z;
accessor<3> w;
};
2018-03-16 11:10:44 +11:00
};
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S,typename T>
simd<S,T>
operator+ (simd<S,T> a, simd<S,T> b)
{ return _mm_add_ps (a.data, b.data); }
2018-03-16 11:10:44 +11:00
//-------------------------------------------------------------------------
2018-03-23 17:52:08 +11:00
template <size_t S,typename T>
simd<S,T>
operator- (simd<S,T> a, simd<S,T> b)
{ return _mm_sub_ps (a.data, b.data); }
//-------------------------------------------------------------------------
template <size_t S,typename T>
simd<S,T>
operator/ (simd<S,T> a, simd<S,T> b)
{ return _mm_div_ps (a.data, b.data); }
//-------------------------------------------------------------------------
template <size_t S,typename T>
simd<S,T>
operator* (simd<S,T> a, simd<S,T> b)
{ return _mm_mul_ps (a.data, b.data); }
///////////////////////////////////////////////////////////////////////////
2018-03-20 13:30:05 +11:00
// computes a*b + c
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
2018-03-20 13:30:05 +11:00
auto
2018-03-23 17:52:08 +11:00
fma (simd<S,T> a, simd<S,T> b, simd<S,T> c)
2018-03-20 13:30:05 +11:00
{
#if defined(__FMA__)
return _mm_fmadd_ps (a.data, b.data, c.data);
#else
return a * b + c;
#endif
}
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
operator< (simd<S,T> a, simd<S,T> b)
{ return _mm_cmplt_ps (a.data, b.data); }
2018-03-16 11:10:44 +11:00
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
operator<= (simd<S,T> a, simd<S,T> b)
{ return _mm_cmple_ps (a.data, b.data); }
template <size_t S, typename T>
simd<S,T>
operator> (simd<S,T> a, simd<S,T> b)
{ return _mm_cmpgt_ps (a.data, b.data); }
template <size_t S, typename T>
simd<S,T>
operator>= (simd<S,T> a, simd<S,T> b)
{ return _mm_cmpge_ps (a.data, b.data); }
template <size_t S, typename T>
simd<S,T>
operator== (simd<S,T> a, simd<S,T> b)
{ return _mm_cmpeq_ps (a.data, b.data); }
///////////////////////////////////////////////////////////////////////////
template <size_t S, typename T>
simd<S,T>
operator| (simd<S,T> a, simd<S,T> b)
{ return _mm_or_ps (a.data, b.data); }
template <size_t S, typename T>
simd<S,T>
operator|| (simd<S,T> a, simd<S,T> b)
{ return _mm_or_ps (a.data, b.data); }
template <size_t S, typename T>
simd<S,T>
operator& (simd<S,T> a, simd<S,T> b)
{ return _mm_and_ps (a.data, b.data); }
template <size_t S, typename T>
simd<S,T>
operator&& (simd<S,T> a, simd<S,T> b)
{ return _mm_and_ps (a.data, b.data); }
2018-03-16 11:10:44 +11:00
2018-03-20 13:30:05 +11:00
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
floor (simd<S,T> val)
2018-03-20 13:30:05 +11:00
{
#if defined(__SSE4_1__)
return mm_floor_ps (val.data);
#else
// NOTE: assumes the rounding mode is 'nearest'
// cast to int and back to truncate
2018-03-23 17:52:08 +11:00
const simd<S,T> truncated = _mm_cvtepi32_ps (_mm_cvtps_epi32 (val.data));
2018-03-20 13:30:05 +11:00
// if the truncated value is greater than the original value we got
// rounded up so we need to decrement to get the true value.
2018-03-23 17:52:08 +11:00
return truncated - ((truncated > val) & simd<S,T> (1));
2018-03-20 13:30:05 +11:00
#endif
}
//---------------------------------------------------------------------------
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
ceil (simd<S,T> val)
2018-03-20 13:30:05 +11:00
{
#if defined(__SSE4_1__)
return _mm_ceil_ps (val.data);
#else
// NOTE: assumes the rounding mode is 'nearest'
// truncate by casting to int and back
2018-03-23 17:52:08 +11:00
const simd<S,T> truncated = _mm_cvtepi32_ps (_mm_cvtps_epi32 (val.data));
2018-03-20 13:30:05 +11:00
// if the truncated value is below the original value it got rounded
// down and needs to be incremented to get the true value.
2018-03-23 17:52:08 +11:00
return truncated + ((truncated < val) & simd<S,T> (1));
2018-03-20 13:30:05 +11:00
#endif
}
2018-03-16 11:10:44 +11:00
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
select (simd<S,T> mask, simd<S,T> a, simd<S,T> b)
2018-03-16 11:10:44 +11:00
{
#if defined(__SSE4_1__)
return _mm_blendv_ps (a, b, mask);
#else
return _mm_or_ps (
2018-03-20 13:30:05 +11:00
_mm_and_ps (mask.data, a.data),
_mm_andnot_ps (mask.data, b.data)
2018-03-16 11:10:44 +11:00
);
#endif
}
//-------------------------------------------------------------------------
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
2018-03-16 11:10:44 +11:00
bool
2018-03-23 17:52:08 +11:00
all (simd<S,T> val)
2018-03-16 11:10:44 +11:00
{
2018-03-20 13:30:05 +11:00
return _mm_movemask_ps (val.data) == 0b1111;
2018-03-16 11:10:44 +11:00
}
//-------------------------------------------------------------------------
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
2018-03-20 13:30:05 +11:00
bool
2018-03-23 17:52:08 +11:00
any (simd<S,T> val)
2018-03-16 11:10:44 +11:00
{
2018-03-20 13:30:05 +11:00
return _mm_movemask_ps (val.data);
2018-03-16 11:10:44 +11:00
}
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
min (simd<S,T> a, simd<S,T> b)
{ return _mm_min_ps (a.data, b.data); }
2018-03-20 13:30:05 +11:00
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
max (simd<S,T> a, simd<S,T> b)
{ return _mm_max_ps (a.data, b.data); }
template <size_t S, typename T>
simd<S,T>
clamp (simd<S,T> val, simd<S,T> lo, simd<S,T> hi)
2018-03-20 13:30:05 +11:00
{
return min (max (val, lo), hi);
}
2018-03-16 11:10:44 +11:00
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
sum (simd<S,T> a)
2018-03-16 11:10:44 +11:00
{
2018-03-20 13:30:05 +11:00
auto part = _mm_hadd_ps (a.data, a.data);
2018-03-16 11:10:44 +11:00
return _mm_hadd_ps (part, part);
}
///////////////////////////////////////////////////////////////////////////
#if defined(__SSE4_1__)
simd
dot (simd a, simd b)
{
return _mm_dp_ps (a, b, 0xff);
}
2018-03-20 13:30:05 +11:00
#else
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
dot (simd<S,T> a, simd<S,T> b)
2018-03-16 11:10:44 +11:00
{
2018-03-20 13:30:05 +11:00
return sum (a * b);
2018-03-16 11:10:44 +11:00
}
#endif
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T> simd<S,T> sqrt (simd<S,T> a) { return _mm_sqrt_ps (a.data); }
template <size_t S, typename T> simd<S,T> rsqrt (simd<S,T> a) { return _mm_rsqrt_ps (a.data); }
2018-03-16 11:10:44 +11:00
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
auto
norm2 (simd<S,T> a)
2018-03-16 11:10:44 +11:00
{
return dot (a, a);
}
//-------------------------------------------------------------------------
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
auto
norm (simd<S,T> a)
2018-03-16 11:10:44 +11:00
{
return sqrt (norm2 (a));
}
//-------------------------------------------------------------------------
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
auto
normalised (simd<S,T> a)
2018-03-16 11:10:44 +11:00
{
return a * rsqrt (norm (a));
}
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>
abs (simd<S,T> a)
2018-03-16 11:10:44 +11:00
{
auto bffff = _mm_set1_epi32 (-1);
auto b7fff = _mm_srli_epi32 (bffff, 1);
auto mask = _mm_castsi128_ps (b7fff);
2018-03-20 13:30:05 +11:00
return _mm_and_ps (mask, a.data);
2018-03-16 11:10:44 +11:00
}
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
auto
hypot (simd<S,T> a)
2018-03-16 11:10:44 +11:00
{
return sqrt (sum (a * a));
}
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
simd<S,T>::operator bool() const
2018-03-16 11:10:44 +11:00
{
2018-03-23 17:52:08 +11:00
return all (*this);
2018-03-16 11:10:44 +11:00
}
2018-03-20 13:30:05 +11:00
2018-03-22 16:10:06 +11:00
///////////////////////////////////////////////////////////////////////////
2018-03-23 17:52:08 +11:00
template <size_t S, typename T>
std::ostream& operator<< (std::ostream &os, simd<S,T> val);
2018-03-16 11:10:44 +11:00
}
#endif