/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2018 Danny Robson */ #pragma once #include "../maths.hpp" #include "../debug/assert.hpp" namespace cruft::maths::fast { // computes: a + t * b + t^2 * c + t^3 * d, but hopefully a little more // efficiently than typing out the above directly. constexpr float estrin (float t, float a, float b, float c, float d) { const auto t2 = t * t; return std::fma (b, t, a) + std::fma (d, t, c) * t2; } // approximates sin over the range [-2pi, 2pi] using a polynomial // approximation over [0; pi/4] // // the relative error should be on the order of 1e-6. // the absolute error should be maximally around 1e-5 // // sollya: // display=hexadecimal; // canonical=on!; // P = fpminimax(sin(x), [|3,5,7|], [|single...|],[0x1p-126;pi/4], , relative, x); // P; // dirtyinfnorm(sin(x)-P, [0;pi/4]); // plot(P(x)-sin(x),[0x1p-126;pi/4]); // // using minimax(sin(x)-x, [|3,5,7,9|], ...) has higher accuracy, but // probably not enough to offset the extra multiplications. constexpr float sin (float x) noexcept { CHECK_LE (x, 2 * pi); CHECK_GE (x, -2 * pi); // Push values into the range [0, 2pi] if (x < 0) x += 2 * pi; // Values in the range [pi, 2pi] are the negation of [0, pi] float neg = x > pi ? -1 : 1; if (x > pi) x -= pi; // Values in [0,pi] are symmetric about pi/2 if (x > pi / 2) x = pi - x; // 1,3,5,7: 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f; // 1,3,5,7,9: 1, -0x1.555556p-3f, 0x1.11115cp-7f, -0x1.a0403ap-13f, 0x1.75dc26p-19f; // sin(x) = x + Ax3 + Bx5 + Cx7 // sin(x) = x (1 + Ax2 + Bx4 + Cx6) // sin(x) = x (1 + Ay1 + By2 + Cy3), y = x^2 return neg * x * estrin (x * x, 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f); } // calculates an approximation of e^x // // we split the supplied value into integer and fractional components as // use the identity: e^(a+b) == e^a * e^b; // // integer powers can be computed efficiently using: // e^x // = 2^(x/ln2) // = 2^kx, where k = 1/ln2 // we set the floating point exponent directly with kx // // the fractional component is approximated by a polynomial across [0;1] // // we force the first two terms as 1 and x because they are fantastically // efficient to load in this context. // // using an order 5 polynomial is overkill, but we use something with 4 // coefficients because it should be pretty efficient to evaluate using // SIMD. // // sollya: // display=hexadecimal; // canonical=on!; // P = fpminimax(exp(x), [|2,3,4,5|], [|single...|], [0x1p-126;1], 1+x); // P; // dirtyinfnorm(P(x)-exp(x), [0;1]); // plot(P(x)-exp(x),[0; 1]); constexpr float exp (float x) { union { int32_t i; float f; } whole; whole.i = static_cast (x); float frac = x - whole.i; whole.i *= 0b00000000101110001010101000111011; //whole.i *= static_cast ((1<<(23)) / std::log(2)); whole.i += 0b00111111100000000000000000000000; //127 << 23; frac = 1 + frac + frac * frac * estrin (frac, 0.4997530281543731689453125f, 0.16853784024715423583984375, 3.6950431764125823974609375e-2, 1.303750835359096527099609375e-2 ); return frac * whole.f; } }