2018-03-20 13:33:07 +11:00
|
|
|
/*
|
2018-08-04 15:14:06 +10:00
|
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
2018-03-20 13:33:07 +11:00
|
|
|
*
|
|
|
|
* Copyright 2018 Danny Robson <danny@nerdcruft.net>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef CRUFT_UTIL_MATHS_FAST_HPP
|
|
|
|
#define CRUFT_UTIL_MATHS_FAST_HPP
|
|
|
|
|
|
|
|
#include "../maths.hpp"
|
|
|
|
#include "../debug.hpp"
|
|
|
|
|
2018-08-05 14:42:02 +10:00
|
|
|
namespace cruft::maths::fast {
|
2018-03-20 13:33:07 +11:00
|
|
|
float
|
|
|
|
estrin (float t, float a, float b, float c, float d)
|
|
|
|
{
|
|
|
|
const auto t2 = t * t;
|
|
|
|
return std::fma (b, t, a) + std::fma (d, t, c) * t2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// approximates sin over the range [0; pi/4] using a polynomial.
|
|
|
|
//
|
|
|
|
// the relative error should be on the order of 1e-6.
|
|
|
|
//
|
|
|
|
// sollya:
|
|
|
|
// display=hexadecimal;
|
|
|
|
// canonical=on!;
|
|
|
|
// P = fpminimax(sin(x), [|3,5,7|], [|single...|],[0x1p-126;pi/4], , relative, x);
|
|
|
|
// P;
|
|
|
|
// dirtyinfnorm(sin(x)-P, [0;pi/4]);
|
|
|
|
// plot(P(x)-sin(x),[0x1p-126;pi/4]);
|
|
|
|
//
|
|
|
|
// using minimax(sin(x)-x, [|3,5,7,9|], ...) has higher accuracy, but
|
|
|
|
// probably not enough to offset the extra multiplications.
|
|
|
|
float
|
|
|
|
sin (float x) noexcept
|
|
|
|
{
|
|
|
|
CHECK_LT (x, 2 * pi<float>);
|
|
|
|
CHECK_GT (x, -2 * pi<float>);
|
|
|
|
|
|
|
|
// flip negative angles due to having an even function
|
|
|
|
float neg = x < 0 ? x *= -1, -1 : 1;
|
|
|
|
|
|
|
|
// reduce the angle due to the period
|
|
|
|
int periods = static_cast<int> (x / 2 / pi<float>);
|
|
|
|
x -= periods * 2 * pi<float>;
|
|
|
|
|
|
|
|
// 1,3,5,7: 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f;
|
|
|
|
// 1,3,5,7,9: 1, -0x1.555556p-3f, 0x1.11115cp-7f, -0x1.a0403ap-13f, 0x1.75dc26p-19f;
|
|
|
|
|
|
|
|
// sin(x) = x + Ax3 + Bx5 + Cx7
|
|
|
|
// sin(x) = x (1 + Ax2 + Bx4 + Cx6)
|
|
|
|
// sin(x) = x (1 + Ay1 + By2 + Cy3), y = x^2
|
|
|
|
|
|
|
|
return neg * x * estrin (x * x, 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f);
|
|
|
|
}
|
|
|
|
|
|
|
|
// calculates an approximation of e^x
|
|
|
|
//
|
|
|
|
// we split the supplied value into integer and fractional components as
|
|
|
|
// use the identity: e^(a+b) == e^a * e^b;
|
|
|
|
//
|
|
|
|
// integer powers can be computed efficiently using:
|
|
|
|
// e^x
|
|
|
|
// = 2^(x/ln2)
|
|
|
|
// = 2^kx, where k = 1/ln2
|
|
|
|
// we set the floating point exponent directly with kx
|
|
|
|
//
|
|
|
|
// the fractional component is approximated by a polynomial across [0;1]
|
|
|
|
//
|
|
|
|
// we force the first two terms as 1 and x because they are fantastically
|
|
|
|
// efficient to load in this context.
|
|
|
|
//
|
|
|
|
// using an order 5 polynomial is overkill, but we use something with 4
|
|
|
|
// coefficients because it should be pretty efficient to evaluate using
|
|
|
|
// SIMD.
|
|
|
|
//
|
|
|
|
// sollya:
|
|
|
|
// display=hexadecimal;
|
|
|
|
// canonical=on!;
|
|
|
|
// P = fpminimax(exp(x), [|2,3,4,5|], [|single...|], [0x1p-126;1], 1+x);
|
|
|
|
// P;
|
|
|
|
// dirtyinfnorm(P(x)-exp(x), [0;1]);
|
|
|
|
// plot(P(x)-exp(x),[0; 1]);
|
|
|
|
float
|
|
|
|
exp (float x)
|
|
|
|
{
|
|
|
|
union {
|
|
|
|
int32_t i;
|
|
|
|
float f;
|
|
|
|
} whole;
|
|
|
|
|
|
|
|
whole.i = static_cast<int32_t> (x);
|
|
|
|
float frac = x - whole.i;
|
|
|
|
|
|
|
|
whole.i *= 0b00000000101110001010101000111011;
|
|
|
|
//whole.i *= static_cast<int32_t> ((1<<(23)) / std::log(2));
|
|
|
|
whole.i += 0b00111111100000000000000000000000; //127 << 23;
|
|
|
|
|
|
|
|
frac = 1 + frac + frac * frac * estrin (frac,
|
|
|
|
0.4997530281543731689453125f,
|
|
|
|
0.16853784024715423583984375,
|
|
|
|
3.6950431764125823974609375e-2,
|
|
|
|
1.303750835359096527099609375e-2
|
|
|
|
);
|
|
|
|
|
|
|
|
return frac * whole.f;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|