From a408a3b2c9ee9cacf735095fa4169621b2887e61 Mon Sep 17 00:00:00 2001
From: Abitofevrything <mylo.fawcett@gmail.com>
Date: Fri, 6 Jan 2023 00:45:37 +0100
Subject: [PATCH] Drop Imath; Add lookup table for f16 -> f32 conversions

---
 Makefile    |  4 ---
 ggml.c      | 71 ++++++++++++++++++++---------------------------------
 ggml.h      |  1 -
 whisper.cpp |  1 -
 4 files changed, 26 insertions(+), 51 deletions(-)

diff --git a/Makefile b/Makefile
index 5b3c235..547102d 100644
--- a/Makefile
+++ b/Makefile
@@ -133,10 +133,6 @@ ifdef WHISPER_GPROF
 	CFLAGS  += -pg
 	CXXFLAGS  += -pg
 endif
-ifdef WHISPER_IMATH
-	CFLAGS += -DGGML_USE_IMATH
-	LDFLAGS += -lImath
-endif
 ifneq ($(filter aarch64%,$(UNAME_M)),)
 endif
 ifneq ($(filter armv6%,$(UNAME_M)),)
diff --git a/ggml.c b/ggml.c
index 794b947..cef6e31 100644
--- a/ggml.c
+++ b/ggml.c
@@ -122,14 +122,6 @@ typedef double ggml_float;
 //
 #include <arm_neon.h>
 
-float ggml_fp16_to_fp32(ggml_fp16_t x) {
-    return x;
-}
-
-ggml_fp16_t ggml_fp32_to_fp16(float x) {
-    return x;
-}
-
 #define GGML_FP16_TO_FP32(x) (x)
 #define GGML_FP32_TO_FP16(x) (x)
 
@@ -148,30 +140,9 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
 #endif
 
 #ifdef __F16C__
-float ggml_fp16_to_fp32(ggml_fp16_t h) {
-    return _cvtsh_ss(h);
-}
-ggml_fp16_t ggml_fp32_to_fp16(float f) {
-    return _cvtss_sh(f, 0);
-}
-
-#define GGML_FP16_TO_FP32(x) _cvtsh_ss(x)
-#define GGML_FP32_TO_FP16(x) _cvtss_sh(x, 0)
-
-#elif GGML_USE_IMATH
-
-#include <Imath/half.h>
-
-float ggml_fp16_to_fp32(ggml_fp16_t h) {
-    return imath_half_to_float(h);
-}
-
-ggml_fp16_t ggml_fp32_to_fp16(float f) {
-    return imath_float_to_half(f);
-}
 
-#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x)
-#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x)
+#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
+#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
 
 #else
 
@@ -196,7 +167,7 @@ static inline uint32_t fp32_to_bits(float f) {
 	return fp32.as_bits;
 }
 
-float ggml_fp16_to_fp32(ggml_fp16_t h) {
+float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
     const uint32_t w = (uint32_t) h << 16;
     const uint32_t sign = w & UINT32_C(0x80000000);
     const uint32_t two_w = w + w;
@@ -219,7 +190,7 @@ float ggml_fp16_to_fp32(ggml_fp16_t h) {
     return fp32_from_bits(result);
 }
 
-ggml_fp16_t ggml_fp32_to_fp16(float f) {
+ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
     const float scale_to_inf = 0x1.0p+112f;
     const float scale_to_zero = 0x1.0p-110f;
@@ -245,8 +216,8 @@ ggml_fp16_t ggml_fp32_to_fp16(float f) {
     return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
 }
 
-#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x)
-#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x)
+#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
+#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
 
 #endif // __F16C__
 
@@ -262,6 +233,24 @@ static ggml_fp16_t table_gelu_f16[1 << 16];
 // precomputed exp table for f16 (128 KB)
 static ggml_fp16_t table_exp_f16[1 << 16];
 
+// precomputed f32 table for f16 (256 KB)
+static float table_f32_f16[1 << 16];
+
+// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
+// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
+#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
+
+float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
+    uint16_t s;
+    memcpy(&s, &f, sizeof(uint16_t));
+    return table_f32_f16[s];
+}
+
+#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
+#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
+
+#endif
+
 //
 // timing
 //
@@ -1496,7 +1485,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
     static bool is_first_call = true;
 
     if (is_first_call) {
-        // initialize GELU and EXP tables
+        // initialize GELU, EXP and F32 tables
         {
             const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
 
@@ -1504,7 +1493,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
             for (int i = 0; i < (1 << 16); ++i) {
                 uint16_t ui = i;
                 memcpy(&ii, &ui, sizeof(ii));
-                const float f = GGML_FP16_TO_FP32(ii);
+                const float f = table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii);
                 table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
                 table_exp_f16[i]  = GGML_FP32_TO_FP16(exp(f));
             }
@@ -8467,12 +8456,4 @@ int ggml_cpu_has_sse3(void) {
 #endif
 }
 
-int ggml_cpu_has_imath(void) {
-#if defined(GGML_USE_IMATH)
-    return 1;
-#else
-    return 0;
-#endif 
-}
-
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/ggml.h b/ggml.h
index 1251ef7..3c62d38 100644
--- a/ggml.h
+++ b/ggml.h
@@ -732,7 +732,6 @@ int ggml_cpu_has_fp16_va(void);
 int ggml_cpu_has_wasm_simd(void);
 int ggml_cpu_has_blas(void);
 int ggml_cpu_has_sse3(void);
-int ggml_cpu_has_imath(void);
 
 #ifdef  __cplusplus
 }
diff --git a/whisper.cpp b/whisper.cpp
index dfc72f9..0ffa74e 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -2582,7 +2582,6 @@ const char * whisper_print_system_info(void) {
     s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
     s += "BLAS = "      + std::to_string(ggml_cpu_has_blas())      + " | ";
     s += "SSE3 = "      + std::to_string(ggml_cpu_has_sse3())      + " | ";
-    s += "IMATH = "     + std::to_string(ggml_cpu_has_imath())     + " | ";
 
     return s.c_str();
 }