fastmath: Increase precision of 2^x, needed for LC3 HR Precision tests

author: Antoine Soulier <asoulier@google.com> 2024-02-27 14:28:28 -0800
committer: Antoine Soulier <asoulier@google.com> 2024-02-27 14:28:28 -0800
commit: a01c060807c3997c8cdd14aebb75ea84473aab27 (patch)
tree: 91e6a085a5f90a38191053fb72b1727bd943b587
parent: e67bb2d07d9a562728cab4f6b561822f8c2e4571 (diff)
download: liblc3-a01c060807c3997c8cdd14aebb75ea84473aab27.tar.gz
2 files changed, 53 insertions, 26 deletions
diff --git a/src/fastmath.h b/src/fastmath.h
index a44e2e8..ac45e61 100644
--- a/src/fastmath.h
+++ b/src/fastmath.h
@@ -89,33 +89,52 @@ static inline float lc3_frexpf(float _x, int *exp) {
 
 /**
  * Fast 2^n approximation
- * x               Operand, range -8 to 8
- * return          2^x approximation (max relative error ~ 7e-6)
+ * x               Operand, range -100 to 100
+ * return          2^x approximation (max relative error ~ 4e-7)
  */
 static inline float lc3_exp2f(float x)
 {
-    float y;
+    /* --- 2^(i/8) for i from 0 to 7 --- */
 
-    /* --- Polynomial approx in range -0.5 to 0.5 --- */
+    static const float e[] = {
+        1.00000000e+00, 1.09050773e+00, 1.18920712e+00, 1.29683955e+00,
+        1.41421356e+00, 1.54221083e+00, 1.68179283e+00, 1.83400809e+00 };
 
-    static const float c[] = { 1.27191277e-09, 1.47415221e-07,
-        1.35510312e-05, 9.38375815e-04, 4.33216946e-02 };
+    /* --- Polynomial approx in range 0 to 1/8 --- */
 
-    y = (    c[0]) * x;
-    y = (y + c[1]) * x;
-    y = (y + c[2]) * x;
-    y = (y + c[3]) * x;
-    y = (y + c[4]) * x;
-    y = (y + 1.f);
+    static const float p[] = {
+        1.00448128e-02, 5.54563260e-02, 2.40228756e-01, 6.93147140e-01 };
+
+    /* --- Split the operand ---
+     *
+     * Such as x = k/8 + y, with k an integer, and |y| < 0.5/8
+     *
+     * Note that `fast-math` compiler option leads to rounding error,
+     * disable optimisation with `volatile`. */
+
+    volatile union { float f; int32_t s; } v;
+
+    v.f = x + 0x1.8p20f;
+    int k = v.s;
+    x -= v.f - 0x1.8p20f;
+
+    /* --- Compute 2^x, with |x| < 1 ---
+     * Perform polynomial approximation in range -0.5/8 to 0.5/8,
+     * and muplity by precomputed value of 2^(i/8), i in [0:7] */
+
+    union { float f; int32_t s; } y;
+
+    y.f = (      p[0]) * x;
+    y.f = (y.f + p[1]) * x;
+    y.f = (y.f + p[2]) * x;
+    y.f = (y.f + p[3]) * x;
+    y.f = (y.f +  1.f) * e[k & 7];
 
-    /* --- Raise to the power of 16  --- */
+    /* --- Add the exponent --- */
 
-    y = y*y;
-    y = y*y;
-    y = y*y;
-    y = y*y;
+    y.s += (k >> 3) << LC3_IEEE754_EXP_SHL;
 
-    return y;
+    return y.f;
 }
 
 /**
diff --git a/tables/fastmath.py b/tables/fastmath.py
index 202561a..0212e06 100755
--- a/tables/fastmath.py
+++ b/tables/fastmath.py
@@ -19,25 +19,33 @@ import numpy as np
 import matplotlib.pyplot as plt
 
 
-def fast_exp2(x, p):
+def fast_exp2(x, t, p):
 
     p = p.astype(np.float32)
     x = x.astype(np.float32)
 
-    y = (((((p[0]*x) + p[1])*x + p[2])*x + p[3])*x + p[4])*x + 1
+    m = ((x + 0.5/8) % (1/8)) - (0.5/8)
+    e = int((x - m) * 8)
 
-    return np.power(y.astype(np.float32), 16)
+    y = ((((p[0]*m) + p[1])*m + p[2])*m + p[3])*m + p[4]
+    y = y * 2**(e // 8) * t[e % 8]
+
+    return y.astype(np.float32)
 
 def approx_exp2():
 
-    x = np.arange(-8, 8, step=1e-3)
+    x = np.arange(0, 1/8, step=1e-6)
+    p = np.polyfit(x, 2 ** x, 4)
+    t = [ 2**(i/8) for i in range(8) ]
+
+    x =  np.arange(-10, 10, step=1e-3)
+    y = [ fast_exp2(x[i], t, p) for i in range(len(x)) ]
 
-    p = np.polyfit(x, ((2 ** (x/16)) - 1) / x, 4)
-    y = [ fast_exp2(x[i], p) for i in range(len(x)) ]
     e = np.abs(y - 2**x) / (2 ** x)
 
-    print('{{ {:14.8e}, {:14.8e}, {:14.8e}, {:14.8e}, {:14.8e} }}'
-                .format(p[0], p[1], p[2], p[3], p[4]))
+    print('{{ {:14.8e}, {:14.8e}, {:14.8e}, {:14.8e}, \n'
+           '  {:14.8e}, {:14.8e}, {:14.8e}, {:14.8e}, '.format(*t))
+    print('{{ {:14.8e}, {:14.8e}, {:14.8e}, {:14.8e}, {:14.8e} }}'.format(*p))
     print('Max relative error: ', np.max(e))
     print('Max RMS error: ', np.sqrt(np.mean(e ** 2)))
author	Antoine Soulier <asoulier@google.com>	2024-02-27 14:28:28 -0800
committer	Antoine Soulier <asoulier@google.com>	2024-02-27 14:28:28 -0800
commit	a01c060807c3997c8cdd14aebb75ea84473aab27 (patch)
tree	91e6a085a5f90a38191053fb72b1727bd943b587
parent	e67bb2d07d9a562728cab4f6b561822f8c2e4571 (diff)
download	liblc3-a01c060807c3997c8cdd14aebb75ea84473aab27.tar.gz