diff --git a/Makefile b/Makefile index bd09c8c..42db7c0 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ ifdef MAKEDIR: CXX = cl # C++20 for designated initializers # TODO: Detect AVX. -CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL +CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL /DNDEBUG CC = $(CXX) CFLAGS = $(CXXFLAGS) LDFLAGS=/incremental:no diff --git a/ggml.c b/ggml.c index d3e44b6..8288b99 100644 --- a/ggml.c +++ b/ggml.c @@ -408,8 +408,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { const int nb = k / QK; const size_t bs = sizeof(float) + QK/2; - uint8_t * restrict pd = (uint8_t *) ((char*)y + 0*bs); - uint8_t * restrict pb = (uint8_t *) ((char*)y + 0*bs + sizeof(float)); + uint8_t * restrict pd = (uint8_t *)y + 0*bs; + uint8_t * restrict pb = (uint8_t *)y + 0*bs + sizeof(float); uint8_t pp[QK/2]; @@ -655,8 +655,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) { const int nb = k / QK; const size_t bs = sizeof(float) + QK/2; - const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs); - const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float)); + const uint8_t * restrict pd = (const uint8_t *)x + 0*bs; + const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float); // scalar for (int i = 0; i < nb; i++) { @@ -1302,11 +1302,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void const size_t bs = sizeof(float) + QK/2; - const uint8_t * restrict pd0 = (const uint8_t *) ((char*)x + 0*bs); - const uint8_t * restrict pd1 = (const uint8_t *) ((char*)y + 0*bs); + const uint8_t * restrict pd0 = (const uint8_t *)x + 0*bs; + const uint8_t * restrict pd1 = (const uint8_t *)y + 0*bs; - const uint8_t * restrict pb0 = (const uint8_t *) ((char*)x + 0*bs + sizeof(float)); - const uint8_t * restrict pb1 = (const uint8_t *) ((char*)y + 0*bs + sizeof(float)); + const uint8_t * restrict pb0 = (const uint8_t *)x + 0*bs + sizeof(float); + const uint8_t * restrict pb1 = (const uint8_t *)y + 0*bs + sizeof(float); float sumf = 0.0; @@ -1732,8 +1732,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res const int nb = n / QK; const size_t bs = sizeof(float) + QK/2; - const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs); - const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float)); + const uint8_t * restrict pd = (const uint8_t *)x + 0*bs; + const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float); #if __ARM_NEON #if QK == 32