libcruft-util/cruft/util/maths/fast.hpp

/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
    * Copyright 2018 Danny Robson <danny@nerdcruft.net>
 */

#pragma once

#include "../maths.hpp"
#include "../debug/assert.hpp"

namespace cruft::maths::fast {
    // computes: a + t * b + t^2 * c + t^3 * d, but hopefully a little more
    // efficiently than typing out the above directly.
    constexpr float
    estrin (float t, float a, float b, float c, float d)
    {
        const auto t2 = t * t;
        return std::fma (b, t, a) + std::fma (d, t, c) * t2;
    }

    // approximates sin over the range [-2pi, 2pi] using a polynomial
    // approximation over [0; pi/4]
    //
    // the relative error should be on the order of 1e-6.
    // the absolute error should be maximally around 1e-5
    //
    // sollya:
    //   display=hexadecimal;
    //   canonical=on!;
    //   P = fpminimax(sin(x), [|3,5,7|], [|single...|],[0x1p-126;pi/4], , relative, x);
    //   P;
    //   dirtyinfnorm(sin(x)-P, [0;pi/4]);
    //   plot(P(x)-sin(x),[0x1p-126;pi/4]);
    //
    // using minimax(sin(x)-x, [|3,5,7,9|], ...) has higher accuracy, but
    // probably not enough to offset the extra multiplications.
    constexpr float
    sin (float x) noexcept
    {
        CHECK_LE (x,  2 * pi<float>);
        CHECK_GE (x, -2 * pi<float>);

        // Push values into the range [0, 2pi]
        if (x < 0)
            x += 2 * pi<float>;

        // Values in the range [pi, 2pi] are the negation of [0, pi]
        float neg = x > pi<float> ? -1 : 1;
        if (x > pi<float>)
            x -= pi<float>;

        // Values in [0,pi] are symmetric about pi/2
        if (x > pi<float> / 2)
            x = pi<float> - x;

        // 1,3,5,7:      1, -0x1.555546p-3f,  0x1.11077ap-7f, -0x1.9954e8p-13f;
        // 1,3,5,7,9:    1, -0x1.555556p-3f,  0x1.11115cp-7f, -0x1.a0403ap-13f,  0x1.75dc26p-19f;

        // sin(x) =    x + Ax3 + Bx5 + Cx7
        // sin(x) = x (1 + Ax2 + Bx4 + Cx6)
        // sin(x) = x (1 + Ay1 + By2 + Cy3), y = x^2

        return neg * x * estrin (x * x, 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f);
    }

    // calculates an approximation of e^x
    //
    // we split the supplied value into integer and fractional components as
    // use the identity: e^(a+b) == e^a * e^b;
    //
    // integer powers can be computed efficiently using:
    //    e^x
    //  = 2^(x/ln2)
    //  = 2^kx, where k = 1/ln2
    // we set the floating point exponent directly with kx
    //
    // the fractional component is approximated by a polynomial across [0;1]
    //
    // we force the first two terms as 1 and x because they are fantastically
    // efficient to load in this context.
    //
    // using an order 5 polynomial is overkill, but we use something with 4
    // coefficients because it should be pretty efficient to evaluate using
    // SIMD.
    //
    // sollya:
    //   display=hexadecimal;
    //   canonical=on!;
    //   P = fpminimax(exp(x), [|2,3,4,5|], [|single...|], [0x1p-126;1], 1+x);
    //   P;
    //   dirtyinfnorm(P(x)-exp(x), [0;1]);
    //   plot(P(x)-exp(x),[0; 1]);
    constexpr float
    exp (float x)
    {
        union {
            int32_t i;
            float f;
        } whole;

        whole.i  = static_cast<int32_t> (x);
        float frac = x - whole.i;

        whole.i *= 0b00000000101110001010101000111011;
        //whole.i *= static_cast<int32_t> ((1<<(23)) / std::log(2));
        whole.i += 0b00111111100000000000000000000000; //127 << 23;

        frac = 1 + frac + frac * frac * estrin (frac,
            0.4997530281543731689453125f,
            0.16853784024715423583984375,
            3.6950431764125823974609375e-2,
            1.303750835359096527099609375e-2
        );

        return frac * whole.f;
    }
}
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00			`/*`
relicense under MPL-2.0 2018-08-04 15:14:06 +10:00			`* This Source Code Form is subject to the terms of the Mozilla Public`
			`* License, v. 2.0. If a copy of the MPL was not distributed with this`
			`* file, You can obtain one at http://mozilla.org/MPL/2.0/.`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00			`*`
			`* Copyright 2018 Danny Robson <danny@nerdcruft.net>`
			`*/`

build: address clang-tidy errors 2021-04-20 12:10:53 +10:00			`#pragma once`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00
			`#include "../maths.hpp"`
debug: split debug headers into components 2019-05-17 12:26:08 +10:00			`#include "../debug/assert.hpp"`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00
rename root namespace from util to cruft This places, at long last, the core library code into the same namespace as the extended library code. 2018-08-05 14:42:02 +10:00			`namespace cruft::maths::fast {`
maths/fast: add minimal explanation of `estrin` function 2021-05-12 15:29:12 +10:00			`// computes: a + t * b + t^2 * c + t^3 * d, but hopefully a little more`
			`// efficiently than typing out the above directly.`
build: address clang-tidy errors 2021-04-20 12:10:53 +10:00			`constexpr float`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00			`estrin (float t, float a, float b, float c, float d)`
			`{`
			`const auto t2 = t * t;`
			`return std::fma (b, t, a) + std::fma (d, t, c) * t2;`
			`}`

maths/fast: actually account for symmetries in `sin` 2021-05-13 17:01:30 +10:00			`// approximates sin over the range [-2pi, 2pi] using a polynomial`
			`// approximation over [0; pi/4]`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00			`//`
			`// the relative error should be on the order of 1e-6.`
maths/fast: actually account for symmetries in `sin` 2021-05-13 17:01:30 +10:00			`// the absolute error should be maximally around 1e-5`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00			`//`
			`// sollya:`
			`// display=hexadecimal;`
			`// canonical=on!;`
			`// P = fpminimax(sin(x), [\|3,5,7\|], [\|single...\|],[0x1p-126;pi/4], , relative, x);`
			`// P;`
			`// dirtyinfnorm(sin(x)-P, [0;pi/4]);`
			`// plot(P(x)-sin(x),[0x1p-126;pi/4]);`
			`//`
			`// using minimax(sin(x)-x, [\|3,5,7,9\|], ...) has higher accuracy, but`
			`// probably not enough to offset the extra multiplications.`
build: address clang-tidy errors 2021-04-20 12:10:53 +10:00			`constexpr float`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00			`sin (float x) noexcept`
			`{`
maths/fast: actually account for symmetries in `sin` 2021-05-13 17:01:30 +10:00			`CHECK_LE (x, 2 * pi<float>);`
			`CHECK_GE (x, -2 * pi<float>);`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00
maths/fast: actually account for symmetries in `sin` 2021-05-13 17:01:30 +10:00			`// Push values into the range [0, 2pi]`
			`if (x < 0)`
			`x += 2 * pi<float>;`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00
maths/fast: actually account for symmetries in `sin` 2021-05-13 17:01:30 +10:00			`// Values in the range [pi, 2pi] are the negation of [0, pi]`
			`float neg = x > pi<float> ? -1 : 1;`
			`if (x > pi<float>)`
			`x -= pi<float>;`

			`// Values in [0,pi] are symmetric about pi/2`
			`if (x > pi<float> / 2)`
			`x = pi<float> - x;`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00
			`// 1,3,5,7: 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f;`
			`// 1,3,5,7,9: 1, -0x1.555556p-3f, 0x1.11115cp-7f, -0x1.a0403ap-13f, 0x1.75dc26p-19f;`

			`// sin(x) = x + Ax3 + Bx5 + Cx7`
			`// sin(x) = x (1 + Ax2 + Bx4 + Cx6)`
			`// sin(x) = x (1 + Ay1 + By2 + Cy3), y = x^2`

			`return neg * x * estrin (x * x, 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f);`
			`}`

			`// calculates an approximation of e^x`
			`//`
			`// we split the supplied value into integer and fractional components as`
			`// use the identity: e^(a+b) == e^a * e^b;`
			`//`
			`// integer powers can be computed efficiently using:`
			`// e^x`
			`// = 2^(x/ln2)`
			`// = 2^kx, where k = 1/ln2`
			`// we set the floating point exponent directly with kx`
			`//`
			`// the fractional component is approximated by a polynomial across [0;1]`
			`//`
			`// we force the first two terms as 1 and x because they are fantastically`
			`// efficient to load in this context.`
			`//`
			`// using an order 5 polynomial is overkill, but we use something with 4`
			`// coefficients because it should be pretty efficient to evaluate using`
			`// SIMD.`
			`//`
			`// sollya:`
			`// display=hexadecimal;`
			`// canonical=on!;`
			`// P = fpminimax(exp(x), [\|2,3,4,5\|], [\|single...\|], [0x1p-126;1], 1+x);`
			`// P;`
			`// dirtyinfnorm(P(x)-exp(x), [0;1]);`
			`// plot(P(x)-exp(x),[0; 1]);`
build: address clang-tidy errors 2021-04-20 12:10:53 +10:00			`constexpr float`
maths: add fast approximations WIP 2018-03-20 13:33:07 +11:00			`exp (float x)`
			`{`
			`union {`
			`int32_t i;`
			`float f;`
			`} whole;`

			`whole.i = static_cast<int32_t> (x);`
			`float frac = x - whole.i;`

			`whole.i *= 0b00000000101110001010101000111011;`
			`//whole.i *= static_cast<int32_t> ((1<<(23)) / std::log(2));`
			`whole.i += 0b00111111100000000000000000000000; //127 << 23;`

			`frac = 1 + frac + frac * frac * estrin (frac,`
			`0.4997530281543731689453125f,`
			`0.16853784024715423583984375,`
			`3.6950431764125823974609375e-2,`
			`1.303750835359096527099609375e-2`
			`);`

			`return frac * whole.f;`
			`}`
build: address clang-tidy errors 2021-04-20 12:10:53 +10:00			`}`