/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2018 Danny Robson */ #ifndef __ARM_NEON__ #error #endif #include "arm_neon.h" namespace util::coord { struct alignas (16) simd { using value_type = float32x4_t; /////////////////////////////////////////////////////////////////////// simd (float a, float b, float c, float d): data (_mm_setr_ps (a, b, c, d)) { ; } //--------------------------------------------------------------------- simd (float v): data (_mm_set_ps1 (v)) { ; } //--------------------------------------------------------------------- simd (value_type _data): data (_data) { ; } //--------------------------------------------------------------------- operator value_type& () { return data; } operator const value_type& () const { return data; } explicit operator bool () const; float operator[] (int idx) const { return data[idx]; } /////////////////////////////////////////////////////////////////////// value_type data; }; /////////////////////////////////////////////////////////////////////////// simd operator* (simd a, simd b) { return vmulq_f32 (a, b); }; simd operator/ (simd a, simd b) { return vdivq_f32 (a, b); }; simd operator+ (simd a, simd b) { return vaddq_f32 (a, b); }; simd operator- (simd a, simd b) { return vsubq_f32 (a, b); }; simd operator< (simd a, simd b); simd operator<= (simd a, simd b); simd operator> (simd a, simd b); simd operator>= (simd a, simd b); simd operator== (simd a, simd b); simd select (simd mask, simd a, simd b); auto sum (simd val) { // reverse and add to self giving: 0123 + 3210 auto revq = vrev64q_f32 (val); auto pair = vaddq_f32 (val, revq); // reverse the upper and lower pairs given (2301 + 1023) auto shuf = vcombine_f32 ( vget_high_f32 (pair), vget_low_f32 (pair) ); // add both partial sums: (2301 + 1032) + (0123 + 3210) return vaddq_f32 (shuf, pair); } simd dot (simd a, simd b) { return sum (a * b); } } #endif