|
|
@ -74,10 +74,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
|
|
|
|
printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
|
|
|
|
printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
|
|
|
|
|
|
|
|
|
|
|
|
const size_t f_buf_size = 1024*1024;
|
|
|
|
const size_t f_buf_size = 1024*1024;
|
|
|
|
char *f_buf = (char *)malloc(f_buf_size);
|
|
|
|
std::vector<char> f_buf(f_buf_size);
|
|
|
|
|
|
|
|
|
|
|
|
auto fin = std::ifstream(fname, std::ios::binary);
|
|
|
|
auto fin = std::ifstream(fname, std::ios::binary);
|
|
|
|
fin.rdbuf()->pubsetbuf(f_buf, f_buf_size);
|
|
|
|
fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf_size);
|
|
|
|
if (!fin) {
|
|
|
|
if (!fin) {
|
|
|
|
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
|
|
|
|
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
|
|
|
|
return false;
|
|
|
|
return false;
|
|
|
@ -315,7 +315,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
|
|
|
|
printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str());
|
|
|
|
printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str());
|
|
|
|
|
|
|
|
|
|
|
|
fin = std::ifstream(fname_part, std::ios::binary);
|
|
|
|
fin = std::ifstream(fname_part, std::ios::binary);
|
|
|
|
fin.rdbuf()->pubsetbuf(f_buf, f_buf_size);
|
|
|
|
fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf_size);
|
|
|
|
fin.seekg(file_offset);
|
|
|
|
fin.seekg(file_offset);
|
|
|
|
|
|
|
|
|
|
|
|
// load weights
|
|
|
|
// load weights
|
|
|
@ -501,8 +501,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
|
|
|
|
fin.close();
|
|
|
|
fin.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
free(f_buf);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|