maths: add fast approximations WIP
This commit is contained in:
parent
5d02171a6f
commit
8212048750
@ -312,6 +312,7 @@ list (
|
|||||||
log.hpp
|
log.hpp
|
||||||
maths.cpp
|
maths.cpp
|
||||||
maths.hpp
|
maths.hpp
|
||||||
|
maths/fast.hpp
|
||||||
matrix.cpp
|
matrix.cpp
|
||||||
matrix2.cpp
|
matrix2.cpp
|
||||||
matrix3.cpp
|
matrix3.cpp
|
||||||
@ -504,6 +505,7 @@ if (TESTS)
|
|||||||
json_types
|
json_types
|
||||||
json2/event
|
json2/event
|
||||||
maths
|
maths
|
||||||
|
maths/fast
|
||||||
matrix
|
matrix
|
||||||
memory/deleter
|
memory/deleter
|
||||||
parse
|
parse
|
||||||
|
121
maths/fast.hpp
Normal file
121
maths/fast.hpp
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
/*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*
|
||||||
|
* Copyright 2018 Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CRUFT_UTIL_MATHS_FAST_HPP
|
||||||
|
#define CRUFT_UTIL_MATHS_FAST_HPP
|
||||||
|
|
||||||
|
#include "../maths.hpp"
|
||||||
|
#include "../debug.hpp"
|
||||||
|
|
||||||
|
namespace util::maths::fast {
|
||||||
|
float
|
||||||
|
estrin (float t, float a, float b, float c, float d)
|
||||||
|
{
|
||||||
|
const auto t2 = t * t;
|
||||||
|
return std::fma (b, t, a) + std::fma (d, t, c) * t2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// approximates sin over the range [0; pi/4] using a polynomial.
|
||||||
|
//
|
||||||
|
// the relative error should be on the order of 1e-6.
|
||||||
|
//
|
||||||
|
// sollya:
|
||||||
|
// display=hexadecimal;
|
||||||
|
// canonical=on!;
|
||||||
|
// P = fpminimax(sin(x), [|3,5,7|], [|single...|],[0x1p-126;pi/4], , relative, x);
|
||||||
|
// P;
|
||||||
|
// dirtyinfnorm(sin(x)-P, [0;pi/4]);
|
||||||
|
// plot(P(x)-sin(x),[0x1p-126;pi/4]);
|
||||||
|
//
|
||||||
|
// using minimax(sin(x)-x, [|3,5,7,9|], ...) has higher accuracy, but
|
||||||
|
// probably not enough to offset the extra multiplications.
|
||||||
|
float
|
||||||
|
sin (float x) noexcept
|
||||||
|
{
|
||||||
|
CHECK_LT (x, 2 * pi<float>);
|
||||||
|
CHECK_GT (x, -2 * pi<float>);
|
||||||
|
|
||||||
|
// flip negative angles due to having an even function
|
||||||
|
float neg = x < 0 ? x *= -1, -1 : 1;
|
||||||
|
|
||||||
|
// reduce the angle due to the period
|
||||||
|
int periods = static_cast<int> (x / 2 / pi<float>);
|
||||||
|
x -= periods * 2 * pi<float>;
|
||||||
|
|
||||||
|
// 1,3,5,7: 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f;
|
||||||
|
// 1,3,5,7,9: 1, -0x1.555556p-3f, 0x1.11115cp-7f, -0x1.a0403ap-13f, 0x1.75dc26p-19f;
|
||||||
|
|
||||||
|
// sin(x) = x + Ax3 + Bx5 + Cx7
|
||||||
|
// sin(x) = x (1 + Ax2 + Bx4 + Cx6)
|
||||||
|
// sin(x) = x (1 + Ay1 + By2 + Cy3), y = x^2
|
||||||
|
|
||||||
|
return neg * x * estrin (x * x, 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculates an approximation of e^x
|
||||||
|
//
|
||||||
|
// we split the supplied value into integer and fractional components as
|
||||||
|
// use the identity: e^(a+b) == e^a * e^b;
|
||||||
|
//
|
||||||
|
// integer powers can be computed efficiently using:
|
||||||
|
// e^x
|
||||||
|
// = 2^(x/ln2)
|
||||||
|
// = 2^kx, where k = 1/ln2
|
||||||
|
// we set the floating point exponent directly with kx
|
||||||
|
//
|
||||||
|
// the fractional component is approximated by a polynomial across [0;1]
|
||||||
|
//
|
||||||
|
// we force the first two terms as 1 and x because they are fantastically
|
||||||
|
// efficient to load in this context.
|
||||||
|
//
|
||||||
|
// using an order 5 polynomial is overkill, but we use something with 4
|
||||||
|
// coefficients because it should be pretty efficient to evaluate using
|
||||||
|
// SIMD.
|
||||||
|
//
|
||||||
|
// sollya:
|
||||||
|
// display=hexadecimal;
|
||||||
|
// canonical=on!;
|
||||||
|
// P = fpminimax(exp(x), [|2,3,4,5|], [|single...|], [0x1p-126;1], 1+x);
|
||||||
|
// P;
|
||||||
|
// dirtyinfnorm(P(x)-exp(x), [0;1]);
|
||||||
|
// plot(P(x)-exp(x),[0; 1]);
|
||||||
|
float
|
||||||
|
exp (float x)
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
int32_t i;
|
||||||
|
float f;
|
||||||
|
} whole;
|
||||||
|
|
||||||
|
whole.i = static_cast<int32_t> (x);
|
||||||
|
float frac = x - whole.i;
|
||||||
|
|
||||||
|
whole.i *= 0b00000000101110001010101000111011;
|
||||||
|
//whole.i *= static_cast<int32_t> ((1<<(23)) / std::log(2));
|
||||||
|
whole.i += 0b00111111100000000000000000000000; //127 << 23;
|
||||||
|
|
||||||
|
frac = 1 + frac + frac * frac * estrin (frac,
|
||||||
|
0.4997530281543731689453125f,
|
||||||
|
0.16853784024715423583984375,
|
||||||
|
3.6950431764125823974609375e-2,
|
||||||
|
1.303750835359096527099609375e-2
|
||||||
|
);
|
||||||
|
|
||||||
|
return frac * whole.f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
100
test/maths/fast.cpp
Normal file
100
test/maths/fast.cpp
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
#include "maths/fast.hpp"
|
||||||
|
#include "tap.hpp"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
struct {
|
||||||
|
unsigned sin = 0;
|
||||||
|
unsigned exp = 0;
|
||||||
|
} successes;
|
||||||
|
|
||||||
|
// 64+1 evenly spaced evaluations of sine in the interval [0,pi/4]
|
||||||
|
// calculated with higher than float64 precision using python's mpmath.
|
||||||
|
//
|
||||||
|
// >>> import mpmath
|
||||||
|
// >>> steps=64
|
||||||
|
// [mpmath.nstr(mpmath.sin(mpmath.pi/4*(x/steps)), 8) for x in range(steps+1)]
|
||||||
|
static constexpr float sins[] = {
|
||||||
|
0x0.000000p+00f, 0x1.921d1fp-07f, 0x1.92155fp-06f, 0x1.2d8657p-05f,
|
||||||
|
0x1.91f65fp-05f, 0x1.f656e7p-05f, 0x1.2d5209p-04f, 0x1.5f6d00p-04f,
|
||||||
|
0x1.917a6bp-04f, 0x1.c3785cp-04f, 0x1.f564e5p-04f, 0x1.139f0cp-03f,
|
||||||
|
0x1.2c8106p-03f, 0x1.45576bp-03f, 0x1.5e2144p-03f, 0x1.76dd9dp-03f,
|
||||||
|
0x1.8f8b83p-03f, 0x1.a82a02p-03f, 0x1.c0b826p-03f, 0x1.d934fep-03f,
|
||||||
|
0x1.f19f97p-03f, 0x1.04fb80p-02f, 0x1.111d26p-02f, 0x1.1d3443p-02f,
|
||||||
|
0x1.294062p-02f, 0x1.35410cp-02f, 0x1.4135c9p-02f, 0x1.4d1e24p-02f,
|
||||||
|
0x1.58f9a7p-02f, 0x1.64c7ddp-02f, 0x1.708853p-02f, 0x1.7c3a93p-02f,
|
||||||
|
0x1.87de2ap-02f, 0x1.9372a6p-02f, 0x1.9ef794p-02f, 0x1.aa6c82p-02f,
|
||||||
|
0x1.b5d100p-02f, 0x1.c1249dp-02f, 0x1.cc66e9p-02f, 0x1.d79775p-02f,
|
||||||
|
0x1.e2b5d3p-02f, 0x1.edc195p-02f, 0x1.f8ba4dp-02f, 0x1.01cfc8p-01f,
|
||||||
|
0x1.073879p-01f, 0x1.0c9704p-01f, 0x1.11eb35p-01f, 0x1.1734d6p-01f,
|
||||||
|
0x1.1c73b3p-01f, 0x1.21a799p-01f, 0x1.26d054p-01f, 0x1.2bedb2p-01f,
|
||||||
|
0x1.30ff7fp-01f, 0x1.36058bp-01f, 0x1.3affa2p-01f, 0x1.3fed95p-01f,
|
||||||
|
0x1.44cf32p-01f, 0x1.49a449p-01f, 0x1.4e6cabp-01f, 0x1.532829p-01f,
|
||||||
|
0x1.57d693p-01f, 0x1.5c77bbp-01f, 0x1.610b75p-01f, 0x1.659192p-01f,
|
||||||
|
0x1.6a09e6p-01f
|
||||||
|
};
|
||||||
|
|
||||||
|
// [hex(mpmath.exp(mpmath.log(2)/2/steps*x), 8) for x in range(steps+1)]
|
||||||
|
static constexpr float exps[] = {
|
||||||
|
0x1.000000p+00f, 0x1.0163dap+00f, 0x1.02c9a3p+00f, 0x1.04315ep+00f,
|
||||||
|
0x1.059b0dp+00f, 0x1.0706b2p+00f, 0x1.087451p+00f, 0x1.09e3ecp+00f,
|
||||||
|
0x1.0b5586p+00f, 0x1.0cc922p+00f, 0x1.0e3ec3p+00f, 0x1.0fb66ap+00f,
|
||||||
|
0x1.11301dp+00f, 0x1.12abdcp+00f, 0x1.1429aap+00f, 0x1.15a98cp+00f,
|
||||||
|
0x1.172b83p+00f, 0x1.18af93p+00f, 0x1.1a35bep+00f, 0x1.1bbe08p+00f,
|
||||||
|
0x1.1d4873p+00f, 0x1.1ed502p+00f, 0x1.2063b8p+00f, 0x1.21f499p+00f,
|
||||||
|
0x1.2387a6p+00f, 0x1.251ce4p+00f, 0x1.26b456p+00f, 0x1.284dfep+00f,
|
||||||
|
0x1.29e9dfp+00f, 0x1.2b87fdp+00f, 0x1.2d285ap+00f, 0x1.2ecafap+00f,
|
||||||
|
0x1.306fe0p+00f, 0x1.32170fp+00f, 0x1.33c08bp+00f, 0x1.356c55p+00f,
|
||||||
|
0x1.371a73p+00f, 0x1.38cae6p+00f, 0x1.3a7db3p+00f, 0x1.3c32dcp+00f,
|
||||||
|
0x1.3dea64p+00f, 0x1.3fa450p+00f, 0x1.4160a2p+00f, 0x1.431f5dp+00f,
|
||||||
|
0x1.44e086p+00f, 0x1.46a41ep+00f, 0x1.486a2bp+00f, 0x1.4a32afp+00f,
|
||||||
|
0x1.4bfdadp+00f, 0x1.4dcb29p+00f, 0x1.4f9b27p+00f, 0x1.516daap+00f,
|
||||||
|
0x1.5342b5p+00f, 0x1.551a4cp+00f, 0x1.56f473p+00f, 0x1.58d12dp+00f,
|
||||||
|
0x1.5ab07dp+00f, 0x1.5c9268p+00f, 0x1.5e76f1p+00f, 0x1.605e1bp+00f,
|
||||||
|
0x1.6247ebp+00f, 0x1.643463p+00f, 0x1.662388p+00f, 0x1.68155dp+00f,
|
||||||
|
0x1.6a09e6p+00f,
|
||||||
|
};
|
||||||
|
(void)exps;
|
||||||
|
|
||||||
|
util::TAP::logger tap;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < std::size (sins); ++i) {
|
||||||
|
const auto theta = util::pi<float> / 4 * i / (std::size (sins)-1);
|
||||||
|
const auto value = util::maths::fast::sin (theta);
|
||||||
|
|
||||||
|
const auto abserr = std::abs (sins[i] - value);
|
||||||
|
const auto relerr = 1 - value / sins[i];
|
||||||
|
|
||||||
|
successes.sin += (abserr < 1e-32f || std::abs (relerr) < 1e-6f) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < std::size (exps); ++i) {
|
||||||
|
static constexpr float half_log2 = 0.34657359027997265470861606072908828403775006718012f;
|
||||||
|
const auto exponent = half_log2 * i / (std::size (exps)-1);
|
||||||
|
const auto value = util::maths::fast::exp (exponent);
|
||||||
|
|
||||||
|
const auto abserr = std::abs (exps[i] - value);
|
||||||
|
const auto relerr = 1 - value / exps[i];
|
||||||
|
|
||||||
|
successes.exp += (abserr < 1e-32f || std::abs (relerr) < 1e-4f) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 64; ++i) {
|
||||||
|
const float expected = expf (i);
|
||||||
|
const float observed = util::maths::fast::exp (i);
|
||||||
|
|
||||||
|
const float relerr = 1 - observed / expected;
|
||||||
|
std::cout << observed << '\t' << expected << '\t' << relerr << '\n';
|
||||||
|
tap.expect_lt (relerr, 1e-4f, "relative exp(%!) error", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
tap.expect_eq (successes.sin, std::size (sins), "sin evaluation, %!/%!", successes.sin, std::size (sins));
|
||||||
|
tap.expect_eq (successes.exp, std::size (exps), "exp evaluation, %!/%!", successes.exp, std::size (exps));
|
||||||
|
|
||||||
|
return tap.status ();
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user