From f66ac6dc4fcb38a3fafe2f17b086f00d2d71f7a2 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 13 Dec 2022 23:09:01 +0200 Subject: [PATCH] ggml : fix indentation --- ggml.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/ggml.c b/ggml.c index 08c2a17..e3c0db3 100644 --- a/ggml.c +++ b/ggml.c @@ -140,9 +140,6 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) { #include #endif -// FP16 <-> FP32 -// ref: https://github.com/Maratyszcza/FP16 - #ifdef __F16C__ float ggml_fp16_to_fp32(ggml_fp16_t h) { return _cvtsh_ss(h); @@ -156,6 +153,9 @@ ggml_fp16_t ggml_fp32_to_fp16(float f) { #else +// FP16 <-> FP32 +// ref: https://github.com/Maratyszcza/FP16 + static inline float fp32_from_bits(uint32_t w) { union { uint32_t as_bits; @@ -439,10 +439,10 @@ inline static void ggml_vec_dot_f32(const int n, float * restrict s, const float y2 = _mm256_loadu_ps(y + i + 16); y3 = _mm256_loadu_ps(y + i + 24); - sum0 = _mm256_add_ps(_mm256_mul_ps(x0, y0), sum0); - sum1 = _mm256_add_ps(_mm256_mul_ps(x1, y1), sum1); - sum2 = _mm256_add_ps(_mm256_mul_ps(x2, y2), sum2); - sum3 = _mm256_add_ps(_mm256_mul_ps(x3, y3), sum3); + sum0 = _mm256_add_ps(_mm256_mul_ps(x0, y0), sum0); + sum1 = _mm256_add_ps(_mm256_mul_ps(x1, y1), sum1); + sum2 = _mm256_add_ps(_mm256_mul_ps(x2, y2), sum2); + sum3 = _mm256_add_ps(_mm256_mul_ps(x3, y3), sum3); } sum0 = _mm256_add_ps(sum0, sum1); @@ -680,10 +680,10 @@ inline static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t y2 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i*)(y + i + 16))); y3 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i*)(y + i + 24))); - sum0 = _mm256_add_ps(_mm256_mul_ps(x0, y0), sum0); - sum1 = _mm256_add_ps(_mm256_mul_ps(x1, y1), sum1); - sum2 = _mm256_add_ps(_mm256_mul_ps(x2, y2), sum2); - sum3 = _mm256_add_ps(_mm256_mul_ps(x3, y3), sum3); + sum0 = _mm256_add_ps(_mm256_mul_ps(x0, y0), sum0); + sum1 = _mm256_add_ps(_mm256_mul_ps(x1, y1), sum1); + sum2 = _mm256_add_ps(_mm256_mul_ps(x2, y2), sum2); + sum3 = _mm256_add_ps(_mm256_mul_ps(x3, y3), sum3); } const __m256 sum01 = _mm256_add_ps(sum0, sum1); @@ -849,10 +849,10 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float y2 = _mm256_loadu_ps(y + i + 16); y3 = _mm256_loadu_ps(y + i + 24); - y0 = _mm256_add_ps(_mm256_mul_ps(x0, v4), y0); - y1 = _mm256_add_ps(_mm256_mul_ps(x1, v4), y1); - y2 = _mm256_add_ps(_mm256_mul_ps(x2, v4), y2); - y3 = _mm256_add_ps(_mm256_mul_ps(x3, v4), y3); + y0 = _mm256_add_ps(_mm256_mul_ps(x0, v4), y0); + y1 = _mm256_add_ps(_mm256_mul_ps(x1, v4), y1); + y2 = _mm256_add_ps(_mm256_mul_ps(x2, v4), y2); + y3 = _mm256_add_ps(_mm256_mul_ps(x3, v4), y3); _mm256_storeu_ps(y + i + 0, y0); _mm256_storeu_ps(y + i + 8, y1); @@ -1046,10 +1046,10 @@ inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * restrict y, ggml_ x2 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i*)(x + i + 16))); x3 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i*)(x + i + 24))); - y0 = _mm256_add_ps(_mm256_mul_ps(x0, v8), y0); - y1 = _mm256_add_ps(_mm256_mul_ps(x1, v8), y1); - y2 = _mm256_add_ps(_mm256_mul_ps(x2, v8), y2); - y3 = _mm256_add_ps(_mm256_mul_ps(x3, v8), y3); + y0 = _mm256_add_ps(_mm256_mul_ps(x0, v8), y0); + y1 = _mm256_add_ps(_mm256_mul_ps(x1, v8), y1); + y2 = _mm256_add_ps(_mm256_mul_ps(x2, v8), y2); + y3 = _mm256_add_ps(_mm256_mul_ps(x3, v8), y3); _mm_storeu_si128((__m128i*)(y + i + 0 ), _mm256_cvtps_ph(y0, 0)); _mm_storeu_si128((__m128i*)(y + i + 8 ), _mm256_cvtps_ph(y1, 0));