Drop Imath; Add lookup table for f16 -> f32 conversions

3 years ago · a408a3b2c9
parent d8f356ac9f
commit a408a3b2c9
4 changed files with 26 additions and 51 deletions
--- a/4
+++ b/4
@ -133,10 +133,6 @@ ifdef WHISPER_GPROF
 	CFLAGS  += -pg
 	CXXFLAGS  += -pg
 endif
 ifdef WHISPER_IMATH
 	CFLAGS += -DGGML_USE_IMATH
 	LDFLAGS += -lImath
 endif
 ifneq ($(filter aarch64%,$(UNAME_M)),)
 endif
 ifneq ($(filter armv6%,$(UNAME_M)),)
--- a/ggml.c
+++ b/ggml.c
@ -122,14 +122,6 @@ typedef double ggml_float;
 //
 #include <arm_neon.h>
 float ggml_fp16_to_fp32(ggml_fp16_t x) {
    return x;
 }
 ggml_fp16_t ggml_fp32_to_fp16(float x) {
    return x;
 }
 #define GGML_FP16_TO_FP32(x) (x)
 #define GGML_FP32_TO_FP16(x) (x)
@ -148,30 +140,9 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
 #endif
 #ifdef __F16C__
 float ggml_fp16_to_fp32(ggml_fp16_t h) {
    return _cvtsh_ss(h);
 }
 ggml_fp16_t ggml_fp32_to_fp16(float f) {
    return _cvtss_sh(f, 0);
 }
 #define GGML_FP16_TO_FP32(x) _cvtsh_ss(x)
 #define GGML_FP32_TO_FP16(x) _cvtss_sh(x, 0)
 #elif GGML_USE_IMATH
 #include <Imath/half.h>
 float ggml_fp16_to_fp32(ggml_fp16_t h) {
    return imath_half_to_float(h);
 }
 ggml_fp16_t ggml_fp32_to_fp16(float f) {
    return imath_float_to_half(f);
 }
-#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x)
+#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
-#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x)
+#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
 #else
@ -196,7 +167,7 @@ static inline uint32_t fp32_to_bits(float f) {
 	return fp32.as_bits;
 }
-float ggml_fp16_to_fp32(ggml_fp16_t h) {
+float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
    const uint32_t w = (uint32_t) h << 16;
    const uint32_t sign = w & UINT32_C(0x80000000);
    const uint32_t two_w = w + w;
@ -219,7 +190,7 @@ float ggml_fp16_to_fp32(ggml_fp16_t h) {
    return fp32_from_bits(result);
 }
-ggml_fp16_t ggml_fp32_to_fp16(float f) {
+ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
    const float scale_to_inf = 0x1.0p+112f;
    const float scale_to_zero = 0x1.0p-110f;
@ -245,8 +216,8 @@ ggml_fp16_t ggml_fp32_to_fp16(float f) {
    return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
 }
-#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x)
+#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
-#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x)
+#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
 #endif // __F16C__
@ -262,6 +233,24 @@ static ggml_fp16_t table_gelu_f16[1 << 16];
 // precomputed exp table for f16 (128 KB)
 static ggml_fp16_t table_exp_f16[1 << 16];
 // precomputed f32 table for f16 (256 KB)
 static float table_f32_f16[1 << 16];
 // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
 // so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
 #if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
 float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
    uint16_t s;
    memcpy(&s, &f, sizeof(uint16_t));
    return table_f32_f16[s];
 }
 #define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
 #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
 #endif
 //
 // timing
 //
@ -1496,7 +1485,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
    static bool is_first_call = true;
    if (is_first_call) {
-        // initialize GELU and EXP tables
+        // initialize GELU, EXP and F32 tables
        {
            const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
@ -1504,7 +1493,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
            for (int i = 0; i < (1 << 16); ++i) {
                uint16_t ui = i;
                memcpy(&ii, &ui, sizeof(ii));
-                const float f = GGML_FP16_TO_FP32(ii);
+                const float f = table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii);
                table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
                table_exp_f16[i]  = GGML_FP32_TO_FP16(exp(f));
            }
@ -8467,12 +8456,4 @@ int ggml_cpu_has_sse3(void) {
 #endif
 }
 int ggml_cpu_has_imath(void) {
 #if defined(GGML_USE_IMATH)
    return 1;
 #else
    return 0;
 #endif 
 }
 ////////////////////////////////////////////////////////////////////////////////
--- a/ggml.h
+++ b/ggml.h
@ -732,7 +732,6 @@ int ggml_cpu_has_fp16_va(void);
 int ggml_cpu_has_wasm_simd(void);
 int ggml_cpu_has_blas(void);
 int ggml_cpu_has_sse3(void);
 int ggml_cpu_has_imath(void);
 #ifdef  __cplusplus
 }
--- a/whisper.cpp
+++ b/whisper.cpp
@ -2582,7 +2582,6 @@ const char * whisper_print_system_info(void) {
    s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
    s += "BLAS = "      + std::to_string(ggml_cpu_has_blas())      + " | ";
    s += "SSE3 = "      + std::to_string(ggml_cpu_has_sse3())      + " | ";
    s += "IMATH = "     + std::to_string(ggml_cpu_has_imath())     + " | ";
    return s.c_str();
 }