libcruft-util/coord/simd_neon.hpp
Danny Robson f6056153e3 rename root namespace from util to cruft
This places, at long last, the core library code into the same namespace
as the extended library code.
2018-08-05 14:42:02 +10:00

89 lines
2.4 KiB
C++

/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 2018 Danny Robson <danny@nerdcruft.net>
*/
#ifndef __ARM_NEON__
#error
#endif
#include "arm_neon.h"
namespace cruft::coord {
struct alignas (16) simd {
using value_type = float32x4_t;
///////////////////////////////////////////////////////////////////////
simd (float a, float b, float c, float d):
data (_mm_setr_ps (a, b, c, d))
{ ; }
//---------------------------------------------------------------------
simd (float v):
data (_mm_set_ps1 (v))
{ ; }
//---------------------------------------------------------------------
simd (value_type _data):
data (_data)
{ ; }
//---------------------------------------------------------------------
operator value_type& () { return data; }
operator const value_type& () const { return data; }
explicit operator bool () const;
float operator[] (int idx) const { return data[idx]; }
///////////////////////////////////////////////////////////////////////
value_type data;
};
///////////////////////////////////////////////////////////////////////////
simd operator* (simd a, simd b) { return vmulq_f32 (a, b); };
simd operator/ (simd a, simd b) { return vdivq_f32 (a, b); };
simd operator+ (simd a, simd b) { return vaddq_f32 (a, b); };
simd operator- (simd a, simd b) { return vsubq_f32 (a, b); };
simd operator< (simd a, simd b);
simd operator<= (simd a, simd b);
simd operator> (simd a, simd b);
simd operator>= (simd a, simd b);
simd operator== (simd a, simd b);
simd select (simd mask, simd a, simd b);
auto sum (simd val)
{
// reverse and add to self giving: 0123 + 3210
auto revq = vrev64q_f32 (val);
auto pair = vaddq_f32 (val, revq);
// reverse the upper and lower pairs given (2301 + 1023)
auto shuf = vcombine_f32 (
vget_high_f32 (pair),
vget_low_f32 (pair)
);
// add both partial sums: (2301 + 1032) + (0123 + 3210)
return vaddq_f32 (shuf, pair);
}
simd
dot (simd a, simd b)
{
return sum (a * b);
}
}
#endif