From eb155d5bb0f1132a0fa21e95e0634f175902b789 Mon Sep 17 00:00:00 2001
From: Danny Robson <danny@nerdcruft.net>
Date: Wed, 29 Jul 2015 16:11:48 +1000
Subject: [PATCH] m/fast: add some fast maths approximations

---
 Makefile.am         |   3 ++
 maths/fast.hpp      |  44 ++++++++++++++++++
 maths/fast.ipp      | 107 ++++++++++++++++++++++++++++++++++++++++++++
 test/maths_fast.cpp |  39 ++++++++++++++++
 4 files changed, 193 insertions(+)
 create mode 100644 maths/fast.hpp
 create mode 100644 maths/fast.ipp
 create mode 100644 test/maths_fast.cpp

diff --git a/Makefile.am b/Makefile.am
index 7e30cafc..08d080ed 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -127,6 +127,8 @@ UTIL_FILES   =			\
     maths.cpp			\
     maths.hpp			\
     maths.ipp			\
+    maths/fast.hpp		\
+    maths/fast.ipp		\
     matrix.cpp			\
     matrix.hpp			\
     matrix.ipp			\
@@ -335,6 +337,7 @@ TEST_BIN = 			\
     test/json_types		\
     test/ray 			\
     test/maths			\
+    test/maths_fast		\
     test/matrix			\
     test/md2			\
     test/md4			\
diff --git a/maths/fast.hpp b/maths/fast.hpp
new file mode 100644
index 00000000..9dfe7f50
--- /dev/null
+++ b/maths/fast.hpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Copyright 2015 Danny Robson <danny@nerdcruft.net>
+ */
+
+#ifndef __UTIL_MATHS_FAST_HPP
+#define __UTIL_MATHS_FAST_HPP
+
+namespace util { namespace maths { namespace fast {
+    ///////////////////////////////////////////////////////////////////////////
+    constexpr float pow (float, float);
+
+    constexpr float pow2 (float);
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    constexpr float exp (float);
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    constexpr float log (float);
+
+    constexpr float log2 (float);
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    constexpr float sqrt (float);
+    constexpr float invsqrt (float);
+} } }
+
+#include "fast.ipp"
+
+#endif
diff --git a/maths/fast.ipp b/maths/fast.ipp
new file mode 100644
index 00000000..a63b10c5
--- /dev/null
+++ b/maths/fast.ipp
@@ -0,0 +1,107 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Copyright 2015 Danny Robson <danny@nerdcruft.net>
+ */
+
+#ifdef __UTIL_MATHS_FAST_IPP
+#error
+#endif
+#define __UTIL_MATHS_FAST_IPP
+    
+
+namespace util { namespace maths { namespace fast {
+    ///////////////////////////////////////////////////////////////////////////
+    constexpr float
+    pow2 (float p)
+    {
+        float offset = (p < 0) ? 1.0f : 0.0f;
+        float clipp = (p < -126) ? -126.0f : p;
+        int32_t w = static_cast<int32_t> (clipp);
+        float z = clipp - w + offset;
+        union { uint32_t i; float f; } v = { static_cast<uint32_t> ( (1 << 23) * (clipp + 121.2740575f + 27.7280233f / (4.84252568f - z) - 1.49012907f * z) ) };
+
+        return v.f;
+
+        //float clipp = (p < -126) ? -126.0f : p;
+        //union { uint32_t i; float f; } v = { static_cast<uint32_t> ( (1 << 23) * (clipp + 126.94269504f) ) };
+        //return v.f;
+    }
+
+
+    constexpr float
+    log2 (float x)
+    {
+        union { float f; uint32_t i; } vx = { x };
+        union { uint32_t i; float f; } mx = { (vx.i & 0x007FFFFF) | 0x3f000000 };
+        float y = vx.i;
+        y *= 1.1920928955078125e-7f;
+
+        return y - 124.22551499f
+            - 1.498030302f * mx.f 
+            - 1.72587999f / (0.3520887068f + mx.f);
+
+
+        //union { float f; uint32_t i; } vx = { x };
+        //float y = vx.i;
+        //y *= 1.1920928955078125e-7f;
+        //return y - 126.94269504f;
+    }
+
+
+    constexpr float
+    log (float x)
+    {
+        return 0.69314718f * log2 (x);
+    }
+
+
+    constexpr float
+    pow (float a, float b)
+    {
+        return pow2 (b * log2 (a));
+    }
+
+
+    constexpr float
+    sqrt (float x)
+    {
+        //return pow (x, 0.5f);
+        union { float f; int32_t i; } u  = { x };
+
+        int32_t v = u.i;
+        v  -= 1 << 23;
+        v >>= 1;
+        v  += 1 << 29;
+
+        u.i = v;
+        return u.f;
+    }
+
+
+    constexpr float
+    invsqrt (float x)
+    {
+        union
+        {
+            float x;
+            int32_t i;
+        } u = { x };
+
+        u.i = 0x5f3759df - (u.i >> 1);
+
+        // refine estimate. repeat as necessary.
+        u.x = u.x * (1.5f - x * 0.5f * u.x * u.x);
+        return u.x;
+    }
+} } }
diff --git a/test/maths_fast.cpp b/test/maths_fast.cpp
new file mode 100644
index 00000000..24f5b798
--- /dev/null
+++ b/test/maths_fast.cpp
@@ -0,0 +1,39 @@
+#include "tap.hpp"
+
+#include "maths/fast.hpp"
+
+
+constexpr float
+threshold (float a, float b)
+{
+    constexpr float PARTS = 100;
+    return std::abs (a + b) / 2 / PARTS;
+}
+
+
+int
+main (void)
+{
+    util::TAP::logger tap;
+
+
+    {
+        auto a = util::maths::fast::log2 (3.456f);
+        auto b = std::log2 (3.456f);
+        tap.expect_lt (std::abs (a - b), threshold (a, b), "fast log2");
+    }
+
+    {
+        auto a = util::maths::fast::pow2 (-100.f);
+        auto b = std::pow (2.f, -100.f);
+        tap.expect_lt (std::abs (a - b), threshold (a, b), "fast pow2");
+    }
+
+    {
+        auto a = util::maths::fast::pow (0.8f, 100.f);
+        auto b = std::pow (0.8f, 100.f);
+        tap.expect_lt (std::abs (a - b), threshold (a, b), "fast pow");
+    }
+
+    return tap.status ();
+}