From 640fd77a258307cadbee6e183f93a591975fb3a9 Mon Sep 17 00:00:00 2001 From: maekawatoshiki Date: Sun, 12 Mar 2023 19:38:25 +0900 Subject: [PATCH] Use buffering --- main.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/main.cpp b/main.cpp index f02b5dd..b5eae67 100644 --- a/main.cpp +++ b/main.cpp @@ -73,7 +73,11 @@ struct llama_model { bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) { printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); + const size_t f_buf_size = 1024*1024; + char *f_buf = (char *)malloc(f_buf_size); + auto fin = std::ifstream(fname, std::ios::binary); + fin.rdbuf()->pubsetbuf(f_buf, f_buf_size); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; @@ -311,6 +315,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str()); fin = std::ifstream(fname_part, std::ios::binary); + fin.rdbuf()->pubsetbuf(f_buf, f_buf_size); fin.seekg(file_offset); // load weights @@ -496,6 +501,8 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab fin.close(); } + free(f_buf); + return true; }