From a408a3b2c9ee9cacf735095fa4169621b2887e61 Mon Sep 17 00:00:00 2001 From: Abitofevrything Date: Fri, 6 Jan 2023 00:45:37 +0100 Subject: [PATCH] Drop Imath; Add lookup table for f16 -> f32 conversions --- Makefile | 4 --- ggml.c | 71 ++++++++++++++++++++--------------------------------- ggml.h | 1 - whisper.cpp | 1 - 4 files changed, 26 insertions(+), 51 deletions(-) diff --git a/Makefile b/Makefile index 5b3c235..547102d 100644 --- a/Makefile +++ b/Makefile @@ -133,10 +133,6 @@ ifdef WHISPER_GPROF CFLAGS += -pg CXXFLAGS += -pg endif -ifdef WHISPER_IMATH - CFLAGS += -DGGML_USE_IMATH - LDFLAGS += -lImath -endif ifneq ($(filter aarch64%,$(UNAME_M)),) endif ifneq ($(filter armv6%,$(UNAME_M)),) diff --git a/ggml.c b/ggml.c index 794b947..cef6e31 100644 --- a/ggml.c +++ b/ggml.c @@ -122,14 +122,6 @@ typedef double ggml_float; // #include -float ggml_fp16_to_fp32(ggml_fp16_t x) { - return x; -} - -ggml_fp16_t ggml_fp32_to_fp16(float x) { - return x; -} - #define GGML_FP16_TO_FP32(x) (x) #define GGML_FP32_TO_FP16(x) (x) @@ -148,30 +140,9 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) { #endif #ifdef __F16C__ -float ggml_fp16_to_fp32(ggml_fp16_t h) { - return _cvtsh_ss(h); -} -ggml_fp16_t ggml_fp32_to_fp16(float f) { - return _cvtss_sh(f, 0); -} - -#define GGML_FP16_TO_FP32(x) _cvtsh_ss(x) -#define GGML_FP32_TO_FP16(x) _cvtss_sh(x, 0) - -#elif GGML_USE_IMATH - -#include - -float ggml_fp16_to_fp32(ggml_fp16_t h) { - return imath_half_to_float(h); -} - -ggml_fp16_t ggml_fp32_to_fp16(float f) { - return imath_float_to_half(f); -} -#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x) -#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x) +#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x) +#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0) #else @@ -196,7 +167,7 @@ static inline uint32_t fp32_to_bits(float f) { return fp32.as_bits; } -float ggml_fp16_to_fp32(ggml_fp16_t h) { +float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { const uint32_t w = (uint32_t) h << 16; const uint32_t sign = w & UINT32_C(0x80000000); const uint32_t two_w = w + w; @@ -219,7 +190,7 @@ float ggml_fp16_to_fp32(ggml_fp16_t h) { return fp32_from_bits(result); } -ggml_fp16_t ggml_fp32_to_fp16(float f) { +ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) const float scale_to_inf = 0x1.0p+112f; const float scale_to_zero = 0x1.0p-110f; @@ -245,8 +216,8 @@ ggml_fp16_t ggml_fp32_to_fp16(float f) { return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); } -#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x) -#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x) +#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) +#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) #endif // __F16C__ @@ -262,6 +233,24 @@ static ggml_fp16_t table_gelu_f16[1 << 16]; // precomputed exp table for f16 (128 KB) static ggml_fp16_t table_exp_f16[1 << 16]; +// precomputed f32 table for f16 (256 KB) +static float table_f32_f16[1 << 16]; + +// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32, +// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON. +#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16) + +float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) { + uint16_t s; + memcpy(&s, &f, sizeof(uint16_t)); + return table_f32_f16[s]; +} + +#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x) +#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) + +#endif + // // timing // @@ -1496,7 +1485,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { static bool is_first_call = true; if (is_first_call) { - // initialize GELU and EXP tables + // initialize GELU, EXP and F32 tables { const uint64_t t_start = ggml_time_us(); UNUSED(t_start); @@ -1504,7 +1493,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { for (int i = 0; i < (1 << 16); ++i) { uint16_t ui = i; memcpy(&ii, &ui, sizeof(ii)); - const float f = GGML_FP16_TO_FP32(ii); + const float f = table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii); table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f)); table_exp_f16[i] = GGML_FP32_TO_FP16(exp(f)); } @@ -8467,12 +8456,4 @@ int ggml_cpu_has_sse3(void) { #endif } -int ggml_cpu_has_imath(void) { -#if defined(GGML_USE_IMATH) - return 1; -#else - return 0; -#endif -} - //////////////////////////////////////////////////////////////////////////////// diff --git a/ggml.h b/ggml.h index 1251ef7..3c62d38 100644 --- a/ggml.h +++ b/ggml.h @@ -732,7 +732,6 @@ int ggml_cpu_has_fp16_va(void); int ggml_cpu_has_wasm_simd(void); int ggml_cpu_has_blas(void); int ggml_cpu_has_sse3(void); -int ggml_cpu_has_imath(void); #ifdef __cplusplus } diff --git a/whisper.cpp b/whisper.cpp index dfc72f9..0ffa74e 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -2582,7 +2582,6 @@ const char * whisper_print_system_info(void) { s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | "; s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | "; s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | "; - s += "IMATH = " + std::to_string(ggml_cpu_has_imath()) + " | "; return s.c_str(); }