|
|
|
@ -457,7 +457,6 @@ void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) {
|
|
|
|
|
float min = FLT_MAX;
|
|
|
|
|
float max = -FLT_MAX;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
for (int l = 0; l < QK; l++) {
|
|
|
|
|
const float v = x[i*QK + l];
|
|
|
|
|
if (v < min) min = v;
|
|
|
|
@ -485,9 +484,9 @@ void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) {
|
|
|
|
|
|
|
|
|
|
memcpy(pb + i*QK/2, pp, sizeof(pp));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO: vectorize
|
|
|
|
|
void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
|
|
|
|
|
assert(k % QK == 0);
|
|
|
|
|
|
|
|
|
@ -496,6 +495,7 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
|
|
|
|
|
const float * restrict pd = (const float *) (x);
|
|
|
|
|
const uint8_t * restrict pb = (const uint8_t *) (pd + nb);
|
|
|
|
|
|
|
|
|
|
// scalar
|
|
|
|
|
for (int i = 0; i < nb; i++) {
|
|
|
|
|
const float d = pd[i];
|
|
|
|
|
|
|
|
|
@ -515,7 +515,6 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
|
|
|
|
|
|
|
|
|
|
assert(!isnan(y[i*QK + l + 0]));
|
|
|
|
|
assert(!isnan(y[i*QK + l + 1]));
|
|
|
|
|
//printf("v0 %f v1 %f, i = %d, l = %d, d = %f, vi = %d, vi0 = %d, vi1 = %d\n", v0, v1, i, l, d, vi, vi0, vi1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -549,7 +548,6 @@ void dequantize_row_q4_1(const void * restrict x, float * restrict y, int k) {
|
|
|
|
|
|
|
|
|
|
assert(!isnan(y[i*QK + l + 0]));
|
|
|
|
|
assert(!isnan(y[i*QK + l + 1]));
|
|
|
|
|
//printf("v0 %f v1 %f, i = %d, l = %d, d = %f, vi = %d, vi0 = %d, vi1 = %d\n", v0, v1, i, l, d, vi, vi0, vi1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|