/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2018 Danny Robson */ #ifndef __ARM_NEON__ #error #endif #include "arm_neon.h" namespace cruft::coord { struct alignas (16) simd { using value_type = float32x4_t; /////////////////////////////////////////////////////////////////////// simd (float a, float b, float c, float d): data (_mm_setr_ps (a, b, c, d)) { ; } //--------------------------------------------------------------------- simd (float v): data (_mm_set_ps1 (v)) { ; } //--------------------------------------------------------------------- simd (value_type _data): data (_data) { ; } //--------------------------------------------------------------------- operator value_type& () { return data; } operator const value_type& () const { return data; } explicit operator bool () const; float operator[] (int idx) const { return data[idx]; } /////////////////////////////////////////////////////////////////////// value_type data; }; /////////////////////////////////////////////////////////////////////////// simd operator* (simd a, simd b) { return vmulq_f32 (a, b); }; simd operator/ (simd a, simd b) { return vdivq_f32 (a, b); }; simd operator+ (simd a, simd b) { return vaddq_f32 (a, b); }; simd operator- (simd a, simd b) { return vsubq_f32 (a, b); }; simd operator< (simd a, simd b); simd operator<= (simd a, simd b); simd operator> (simd a, simd b); simd operator>= (simd a, simd b); simd operator== (simd a, simd b); simd select (simd mask, simd a, simd b); auto sum (simd val) { // reverse and add to self giving: 0123 + 3210 auto revq = vrev64q_f32 (val); auto pair = vaddq_f32 (val, revq); // reverse the upper and lower pairs given (2301 + 1023) auto shuf = vcombine_f32 ( vget_high_f32 (pair), vget_low_f32 (pair) ); // add both partial sums: (2301 + 1032) + (0123 + 3210) return vaddq_f32 (shuf, pair); } simd dot (simd a, simd b) { return sum (a * b); } } #endif