Drop Imath; Add lookup table for f16 -> f32 conversions

pull/368/head
Abitofevrything 3 years ago
parent d8f356ac9f
commit a408a3b2c9

@ -133,10 +133,6 @@ ifdef WHISPER_GPROF
CFLAGS += -pg CFLAGS += -pg
CXXFLAGS += -pg CXXFLAGS += -pg
endif endif
ifdef WHISPER_IMATH
CFLAGS += -DGGML_USE_IMATH
LDFLAGS += -lImath
endif
ifneq ($(filter aarch64%,$(UNAME_M)),) ifneq ($(filter aarch64%,$(UNAME_M)),)
endif endif
ifneq ($(filter armv6%,$(UNAME_M)),) ifneq ($(filter armv6%,$(UNAME_M)),)

@ -122,14 +122,6 @@ typedef double ggml_float;
// //
#include <arm_neon.h> #include <arm_neon.h>
float ggml_fp16_to_fp32(ggml_fp16_t x) {
return x;
}
ggml_fp16_t ggml_fp32_to_fp16(float x) {
return x;
}
#define GGML_FP16_TO_FP32(x) (x) #define GGML_FP16_TO_FP32(x) (x)
#define GGML_FP32_TO_FP16(x) (x) #define GGML_FP32_TO_FP16(x) (x)
@ -148,30 +140,9 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
#endif #endif
#ifdef __F16C__ #ifdef __F16C__
float ggml_fp16_to_fp32(ggml_fp16_t h) {
return _cvtsh_ss(h);
}
ggml_fp16_t ggml_fp32_to_fp16(float f) {
return _cvtss_sh(f, 0);
}
#define GGML_FP16_TO_FP32(x) _cvtsh_ss(x)
#define GGML_FP32_TO_FP16(x) _cvtss_sh(x, 0)
#elif GGML_USE_IMATH
#include <Imath/half.h>
float ggml_fp16_to_fp32(ggml_fp16_t h) {
return imath_half_to_float(h);
}
ggml_fp16_t ggml_fp32_to_fp16(float f) {
return imath_float_to_half(f);
}
#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x) #define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x) #define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
#else #else
@ -196,7 +167,7 @@ static inline uint32_t fp32_to_bits(float f) {
return fp32.as_bits; return fp32.as_bits;
} }
float ggml_fp16_to_fp32(ggml_fp16_t h) { float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
const uint32_t w = (uint32_t) h << 16; const uint32_t w = (uint32_t) h << 16;
const uint32_t sign = w & UINT32_C(0x80000000); const uint32_t sign = w & UINT32_C(0x80000000);
const uint32_t two_w = w + w; const uint32_t two_w = w + w;
@ -219,7 +190,7 @@ float ggml_fp16_to_fp32(ggml_fp16_t h) {
return fp32_from_bits(result); return fp32_from_bits(result);
} }
ggml_fp16_t ggml_fp32_to_fp16(float f) { ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float scale_to_inf = 0x1.0p+112f; const float scale_to_inf = 0x1.0p+112f;
const float scale_to_zero = 0x1.0p-110f; const float scale_to_zero = 0x1.0p-110f;
@ -245,8 +216,8 @@ ggml_fp16_t ggml_fp32_to_fp16(float f) {
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
} }
#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x) #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x) #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
#endif // __F16C__ #endif // __F16C__
@ -262,6 +233,24 @@ static ggml_fp16_t table_gelu_f16[1 << 16];
// precomputed exp table for f16 (128 KB) // precomputed exp table for f16 (128 KB)
static ggml_fp16_t table_exp_f16[1 << 16]; static ggml_fp16_t table_exp_f16[1 << 16];
// precomputed f32 table for f16 (256 KB)
static float table_f32_f16[1 << 16];
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
uint16_t s;
memcpy(&s, &f, sizeof(uint16_t));
return table_f32_f16[s];
}
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
#endif
// //
// timing // timing
// //
@ -1496,7 +1485,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
static bool is_first_call = true; static bool is_first_call = true;
if (is_first_call) { if (is_first_call) {
// initialize GELU and EXP tables // initialize GELU, EXP and F32 tables
{ {
const uint64_t t_start = ggml_time_us(); UNUSED(t_start); const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
@ -1504,7 +1493,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
for (int i = 0; i < (1 << 16); ++i) { for (int i = 0; i < (1 << 16); ++i) {
uint16_t ui = i; uint16_t ui = i;
memcpy(&ii, &ui, sizeof(ii)); memcpy(&ii, &ui, sizeof(ii));
const float f = GGML_FP16_TO_FP32(ii); const float f = table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii);
table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f)); table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
table_exp_f16[i] = GGML_FP32_TO_FP16(exp(f)); table_exp_f16[i] = GGML_FP32_TO_FP16(exp(f));
} }
@ -8467,12 +8456,4 @@ int ggml_cpu_has_sse3(void) {
#endif #endif
} }
int ggml_cpu_has_imath(void) {
#if defined(GGML_USE_IMATH)
return 1;
#else
return 0;
#endif
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////

@ -732,7 +732,6 @@ int ggml_cpu_has_fp16_va(void);
int ggml_cpu_has_wasm_simd(void); int ggml_cpu_has_wasm_simd(void);
int ggml_cpu_has_blas(void); int ggml_cpu_has_blas(void);
int ggml_cpu_has_sse3(void); int ggml_cpu_has_sse3(void);
int ggml_cpu_has_imath(void);
#ifdef __cplusplus #ifdef __cplusplus
} }

@ -2582,7 +2582,6 @@ const char * whisper_print_system_info(void) {
s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | "; s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | "; s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | ";
s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | "; s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
s += "IMATH = " + std::to_string(ggml_cpu_has_imath()) + " | ";
return s.c_str(); return s.c_str();
} }

Loading…
Cancel
Save