|
|
|
@ -285,8 +285,9 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
|
|
|
|
|
// timing
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
|
|
|
|
#if _WIN32
|
|
|
|
|
static int64_t timer_freq;
|
|
|
|
|
|
|
|
|
|
void ggml_time_init(void) {
|
|
|
|
|
LARGE_INTEGER frequency;
|
|
|
|
|
QueryPerformanceFrequency(&frequency);
|
|
|
|
@ -407,8 +408,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
|
|
|
|
|
const int nb = k / QK;
|
|
|
|
|
const size_t bs = sizeof(float) + QK/2;
|
|
|
|
|
|
|
|
|
|
uint8_t * restrict pd = (uint8_t *) (y + 0*bs);
|
|
|
|
|
uint8_t * restrict pb = (uint8_t *) (y + 0*bs + sizeof(float));
|
|
|
|
|
uint8_t * restrict pd = (uint8_t *)y + 0*bs;
|
|
|
|
|
uint8_t * restrict pb = (uint8_t *)y + 0*bs + sizeof(float);
|
|
|
|
|
|
|
|
|
|
uint8_t pp[QK/2];
|
|
|
|
|
|
|
|
|
@ -654,8 +655,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
|
|
|
|
|
const int nb = k / QK;
|
|
|
|
|
const size_t bs = sizeof(float) + QK/2;
|
|
|
|
|
|
|
|
|
|
const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
|
|
|
|
|
const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
|
|
|
|
|
const uint8_t * restrict pd = (const uint8_t *)x + 0*bs;
|
|
|
|
|
const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float);
|
|
|
|
|
|
|
|
|
|
// scalar
|
|
|
|
|
for (int i = 0; i < nb; i++) {
|
|
|
|
@ -1301,11 +1302,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
|
|
|
|
|
|
|
|
|
|
const size_t bs = sizeof(float) + QK/2;
|
|
|
|
|
|
|
|
|
|
const uint8_t * restrict pd0 = (const uint8_t *) (x + 0*bs);
|
|
|
|
|
const uint8_t * restrict pd1 = (const uint8_t *) (y + 0*bs);
|
|
|
|
|
const uint8_t * restrict pd0 = (const uint8_t *)x + 0*bs;
|
|
|
|
|
const uint8_t * restrict pd1 = (const uint8_t *)y + 0*bs;
|
|
|
|
|
|
|
|
|
|
const uint8_t * restrict pb0 = (const uint8_t *) (x + 0*bs + sizeof(float));
|
|
|
|
|
const uint8_t * restrict pb1 = (const uint8_t *) (y + 0*bs + sizeof(float));
|
|
|
|
|
const uint8_t * restrict pb0 = (const uint8_t *)x + 0*bs + sizeof(float);
|
|
|
|
|
const uint8_t * restrict pb1 = (const uint8_t *)y + 0*bs + sizeof(float);
|
|
|
|
|
|
|
|
|
|
float sumf = 0.0;
|
|
|
|
|
|
|
|
|
@ -1731,8 +1732,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res
|
|
|
|
|
const int nb = n / QK;
|
|
|
|
|
const size_t bs = sizeof(float) + QK/2;
|
|
|
|
|
|
|
|
|
|
const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
|
|
|
|
|
const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
|
|
|
|
|
const uint8_t * restrict pd = (const uint8_t *)x + 0*bs;
|
|
|
|
|
const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float);
|
|
|
|
|
|
|
|
|
|
#if __ARM_NEON
|
|
|
|
|
#if QK == 32
|
|
|
|
|