pull/36/merge
Jay Krell 1 year ago committed by GitHub
commit 9b80f683f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,3 +1,47 @@
# This one Makefile works with Microsoft nmake and Unix make.
# They use different conditional syntax, but each can be nested and inverted within the other.
all: default
ifdef MAKEDIR:
!ifdef MAKEDIR
# Windows code.
CXX = cl
# C++20 for designated initializers
# TODO: Detect AVX.
CXXFLAGS = /nologo /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL /DNDEBUG
CC = $(CXX)
CFLAGS = $(CXXFLAGS)
LDFLAGS=/incremental:no
LINK_OUT = /out:
CC_OUT = /Fo
O = obj
EXE = .exe
RM_F = del 2>nul /f
UNIX_SPACE =
START_LINK_FLAGS = /link
# The need for this is surprising but otherwise there is an access violation
# running main -h.
SLASH=^\
!else
else
# Unix code.
O = o
EXE=
UNIX_SPACE = " "
UNIX_SPACE := $(UNIX_SPACE:"=)
LINK_OUT = -o$(UNIX_SPACE)
CC_OUT=-o$(UNIX_SPACE)
RM_F = rm -f
START_LINK_FLAGS=
SLASH=/
ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
@ -171,28 +215,33 @@ $(info I LDFLAGS: $(LDFLAGS))
$(info I CC: $(CCV))
$(info I CXX: $(CXXV))
$(info )
endif
!endif :
# Common Unix/Windows code.
default: main quantize
default: main$(EXE) quantize$(EXE)
#
# Build library
#
ggml.o: ggml.c ggml.h
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
ggml.$O: ggml.c ggml.h
$(CC) $(CFLAGS) -c ggml.c $(CC_OUT)ggml.$O
utils.o: utils.cpp utils.h
$(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o
utils.$O: utils.cpp utils.h
$(CXX) $(CXXFLAGS) -c utils.cpp $(CC_OUT)utils.$O
clean:
rm -f *.o main quantize
$(RM_F) *.$O main$(EXE) quantize$(EXE)
main: main.cpp ggml.o utils.o
$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS)
./main -h
main$(EXE): main.cpp ggml.$O utils.$O
$(CXX) $(CXXFLAGS) main.cpp $(START_LINK_FLAGS) ggml.$O utils.$O $(LINK_OUT)main$(EXE) $(LDFLAGS)
.$(SLASH)main.exe -h
quantize: quantize.cpp ggml.o utils.o
$(CXX) $(CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $(LDFLAGS)
quantize$(EXE): quantize.cpp ggml.$O utils.$O
$(CXX) $(CXXFLAGS) quantize.cpp $(START_LINK_FLAGS) ggml.$O utils.$O $(LINK_OUT)quantize$(EXE) $(LDFLAGS)
#
# Tests

@ -285,8 +285,9 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
// timing
//
#if defined(_MSC_VER) || defined(__MINGW32__)
#if _WIN32
static int64_t timer_freq;
void ggml_time_init(void) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
@ -407,8 +408,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
const int nb = k / QK;
const size_t bs = sizeof(float) + QK/2;
uint8_t * restrict pd = (uint8_t *) (y + 0*bs);
uint8_t * restrict pb = (uint8_t *) (y + 0*bs + sizeof(float));
uint8_t * restrict pd = (uint8_t *)y + 0*bs;
uint8_t * restrict pb = (uint8_t *)y + 0*bs + sizeof(float);
uint8_t pp[QK/2];
@ -654,8 +655,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
const int nb = k / QK;
const size_t bs = sizeof(float) + QK/2;
const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
const uint8_t * restrict pd = (const uint8_t *)x + 0*bs;
const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float);
// scalar
for (int i = 0; i < nb; i++) {
@ -1301,11 +1302,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
const size_t bs = sizeof(float) + QK/2;
const uint8_t * restrict pd0 = (const uint8_t *) (x + 0*bs);
const uint8_t * restrict pd1 = (const uint8_t *) (y + 0*bs);
const uint8_t * restrict pd0 = (const uint8_t *)x + 0*bs;
const uint8_t * restrict pd1 = (const uint8_t *)y + 0*bs;
const uint8_t * restrict pb0 = (const uint8_t *) (x + 0*bs + sizeof(float));
const uint8_t * restrict pb1 = (const uint8_t *) (y + 0*bs + sizeof(float));
const uint8_t * restrict pb0 = (const uint8_t *)x + 0*bs + sizeof(float);
const uint8_t * restrict pb1 = (const uint8_t *)y + 0*bs + sizeof(float);
float sumf = 0.0;
@ -1731,8 +1732,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res
const int nb = n / QK;
const size_t bs = sizeof(float) + QK/2;
const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
const uint8_t * restrict pd = (const uint8_t *)x + 0*bs;
const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float);
#if __ARM_NEON
#if QK == 32

@ -732,7 +732,8 @@ bool llama_eval(
return true;
}
int main(int argc, char ** argv) {
int main(int argc, char ** argv) {
ggml_time_init();
const int64_t t_main_start_us = ggml_time_us();
gpt_params params;

@ -471,8 +471,13 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
const size_t row_size = nb*bs;
assert(k % qk == 0);
uint8_t pp[qk/2];
size_t const pp_size = (qk / 2);
#if _MSC_VER
uint8_t* pp = (uint8_t*)_alloca(pp_size);
#else
uint8_t pp[pp_size];
#endif
char * pdst = (char *) dst;
@ -511,7 +516,7 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
pp[l/2] = vi0 | (vi1 << 4);
}
memcpy(pb, pp, sizeof(pp));
memcpy(pb, pp, pp_size);
pb += bs;
}
}
@ -526,7 +531,12 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
assert(k % qk == 0);
uint8_t pp[qk/2];
size_t const pp_size = (qk / 2);
#if _MSC_VER
uint8_t* pp = (uint8_t*)_alloca(pp_size);
#else
uint8_t pp[pp_size];
#endif
char * pdst = (char *) dst;
@ -570,7 +580,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
pp[l/2] = vi0 | (vi1 << 4);
}
memcpy(pb + i*qk/2, pp, sizeof(pp));
memcpy(pb + i*qk/2, pp, pp_size);
}
}
}

Loading…
Cancel
Save