|
|
@ -130,7 +130,19 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
|
|
|
|
|
|
|
|
|
|
|
|
// for the big tensors, we have the option to store the data in 16-bit floats
|
|
|
|
// for the big tensors, we have the option to store the data in 16-bit floats
|
|
|
|
// in order to save memory and also to speed up the computation
|
|
|
|
// in order to save memory and also to speed up the computation
|
|
|
|
const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
|
|
|
|
ggml_type wtype = GGML_TYPE_COUNT;
|
|
|
|
|
|
|
|
switch (model.hparams.f16) {
|
|
|
|
|
|
|
|
case 0: wtype = GGML_TYPE_F32; break;
|
|
|
|
|
|
|
|
case 1: wtype = GGML_TYPE_F16; break;
|
|
|
|
|
|
|
|
case 2: wtype = GGML_TYPE_Q4_0; break;
|
|
|
|
|
|
|
|
case 3: wtype = GGML_TYPE_Q4_1; break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
fprintf(stderr, "%s: invalid model file '%s' (bad f16 value %d)\n",
|
|
|
|
|
|
|
|
__func__, fname.c_str(), model.hparams.f16);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
auto & ctx = model.ctx;
|
|
|
|
auto & ctx = model.ctx;
|
|
|
|
|
|
|
|
|
|
|
@ -319,9 +331,26 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
|
|
|
|
return false;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const size_t bpe = (ftype == 0) ? sizeof(float) : sizeof(ggml_fp16_t);
|
|
|
|
if (1) {
|
|
|
|
|
|
|
|
static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
|
|
|
|
|
|
|
|
printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (nelements*bpe != ggml_nbytes(tensor)) {
|
|
|
|
size_t bpe = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
switch (ftype) {
|
|
|
|
|
|
|
|
case 0: bpe = ggml_type_size(GGML_TYPE_F32); break;
|
|
|
|
|
|
|
|
case 1: bpe = ggml_type_size(GGML_TYPE_F16); break;
|
|
|
|
|
|
|
|
case 2: bpe = ggml_type_size(GGML_TYPE_Q4_0); assert(ne[0] % 64 == 0); break;
|
|
|
|
|
|
|
|
case 3: bpe = ggml_type_size(GGML_TYPE_Q4_1); assert(ne[0] % 64 == 0); break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
fprintf(stderr, "%s: unknown ftype %d in model file\n", __func__, ftype);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) {
|
|
|
|
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
|
|
|
|
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
|
|
|
|
__func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
|
|
|
|
__func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
|
|
|
|
return false;
|
|
|
|
return false;
|
|
|
@ -329,7 +358,6 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
|
|
|
|
|
|
|
|
|
|
|
|
fin.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));
|
|
|
|
fin.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));
|
|
|
|
|
|
|
|
|
|
|
|
//printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
|
|
|
|
|
|
|
total_size += ggml_nbytes(tensor);
|
|
|
|
total_size += ggml_nbytes(tensor);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|