libcruft-util/maths/fast.hpp

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
    * Copyright 2018 Danny Robson <danny@nerdcruft.net>
 */

#ifndef CRUFT_UTIL_MATHS_FAST_HPP
#define CRUFT_UTIL_MATHS_FAST_HPP

#include "../maths.hpp"
#include "../debug.hpp"

namespace util::maths::fast {
    float
    estrin (float t, float a, float b, float c, float d)
    {
        const auto t2 = t * t;
        return std::fma (b, t, a) + std::fma (d, t, c) * t2;
    }

    // approximates sin over the range [0; pi/4] using a polynomial.
    //
    // the relative error should be on the order of 1e-6.
    //
    // sollya:
    //   display=hexadecimal;
    //   canonical=on!;
    //   P = fpminimax(sin(x), [|3,5,7|], [|single...|],[0x1p-126;pi/4], , relative, x);
    //   P;
    //   dirtyinfnorm(sin(x)-P, [0;pi/4]);
    //   plot(P(x)-sin(x),[0x1p-126;pi/4]);
    //
    // using minimax(sin(x)-x, [|3,5,7,9|], ...) has higher accuracy, but
    // probably not enough to offset the extra multiplications.
    float
    sin (float x) noexcept
    {
        CHECK_LT (x,  2 * pi<float>);
        CHECK_GT (x, -2 * pi<float>);

        // flip negative angles due to having an even function
        float neg = x < 0 ? x *= -1, -1 : 1;

        // reduce the angle due to the period
        int periods = static_cast<int> (x / 2 / pi<float>);
        x -= periods * 2 * pi<float>;

        // 1,3,5,7:      1, -0x1.555546p-3f,  0x1.11077ap-7f, -0x1.9954e8p-13f;
        // 1,3,5,7,9:    1, -0x1.555556p-3f,  0x1.11115cp-7f, -0x1.a0403ap-13f,  0x1.75dc26p-19f;

        // sin(x) =    x + Ax3 + Bx5 + Cx7
        // sin(x) = x (1 + Ax2 + Bx4 + Cx6)
        // sin(x) = x (1 + Ay1 + By2 + Cy3), y = x^2

        return neg * x * estrin (x * x, 1, -0x1.555546p-3f, 0x1.11077ap-7f, -0x1.9954e8p-13f);
    }

    // calculates an approximation of e^x
    //
    // we split the supplied value into integer and fractional components as
    // use the identity: e^(a+b) == e^a * e^b;
    //
    // integer powers can be computed efficiently using:
    //    e^x
    //  = 2^(x/ln2)
    //  = 2^kx, where k = 1/ln2
    // we set the floating point exponent directly with kx
    //
    // the fractional component is approximated by a polynomial across [0;1]
    //
    // we force the first two terms as 1 and x because they are fantastically
    // efficient to load in this context.
    //
    // using an order 5 polynomial is overkill, but we use something with 4
    // coefficients because it should be pretty efficient to evaluate using
    // SIMD.
    //
    // sollya:
    //   display=hexadecimal;
    //   canonical=on!;
    //   P = fpminimax(exp(x), [|2,3,4,5|], [|single...|], [0x1p-126;1], 1+x);
    //   P;
    //   dirtyinfnorm(P(x)-exp(x), [0;1]);
    //   plot(P(x)-exp(x),[0; 1]);
    float
    exp (float x)
    {
        union {
            int32_t i;
            float f;
        } whole;

        whole.i  = static_cast<int32_t> (x);
        float frac = x - whole.i;

        whole.i *= 0b00000000101110001010101000111011;
        //whole.i *= static_cast<int32_t> ((1<<(23)) / std::log(2));
        whole.i += 0b00111111100000000000000000000000; //127 << 23;

        frac = 1 + frac + frac * frac * estrin (frac,
            0.4997530281543731689453125f,
            0.16853784024715423583984375,
            3.6950431764125823974609375e-2,
            1.303750835359096527099609375e-2
        );

        return frac * whole.f;
    }
}

#endif