From 96dc6a0c6859549f996bf32067ed22fc30610547 Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 01/10] work towards tokenizer integration --- Makefile | 7 +++---- build_deps.sh | 12 ++++++++++++ main.cpp | 34 ++++++++++++++++++++++++++-------- 3 files changed, 41 insertions(+), 12 deletions(-) create mode 100644 build_deps.sh diff --git a/Makefile b/Makefile index 8388c29..0b8464c 100644 --- a/Makefile +++ b/Makefile @@ -31,9 +31,8 @@ endif # CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC -LDFLAGS = - +CXXFLAGS = -I. -I../../sentencepiece/src/ -O3 -DNDEBUG -std=c++11 -fPIC +LDFLAGS = # OS specific # TODO: support Windows ifeq ($(UNAME_S),Linux) @@ -188,7 +187,7 @@ clean: rm -f *.o main quantize main: main.cpp ggml.o utils.o - $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS) + $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o /Users/billhamilton/src/sentencepiece/build/src/libsentencepiece.a -o main $(LDFLAGS) ./main -h quantize: quantize.cpp ggml.o utils.o diff --git a/build_deps.sh b/build_deps.sh new file mode 100644 index 0000000..444d207 --- /dev/null +++ b/build_deps.sh @@ -0,0 +1,12 @@ +#https://github.com/google/sentencepiece.git +#9ffb33a14c97c512103be0ee74740099660b39aa + +curl -LO https://github.com/google/sentencepiece/releases/download/v0.1.97/sentencepiece-0.1.97.tar.gz +tar xzvf sentencepiece-0.1.97.tar.gz +cd sentencepiece-0.1.97/src +mkdir build +cd build +cmake .. +make sentencepiece-static -j $(nproc) +cd ../.. + diff --git a/main.cpp b/main.cpp index 387d35f..7490569 100644 --- a/main.cpp +++ b/main.cpp @@ -14,6 +14,12 @@ #include #include +#include + + +//Tokenizer object +sentencepiece::SentencePieceProcessor processor; + #define ANSI_COLOR_RED "\x1b[31m" #define ANSI_COLOR_GREEN "\x1b[32m" #define ANSI_COLOR_YELLOW "\x1b[33m" @@ -758,6 +764,11 @@ void sigint_handler(int signo) { } int main(int argc, char ** argv) { + const auto status = processor.Load("models/tokenizer.model"); + if (!status.ok()) { + printf("%s", status.ToString().c_str()); + // error + } ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); @@ -807,7 +818,8 @@ int main(int argc, char ** argv) { std::vector logits; // tokenize the prompt - std::vector embd_inp = ::llama_tokenize(vocab, params.prompt, true); + std::vector embd_inp; + processor.Encode(params.prompt, &embd_inp); params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); @@ -935,14 +947,20 @@ int main(int argc, char ** argv) { // display text if (!input_noecho) { - for (auto id : embd) { - printf("%s", vocab.id_to_token[id].c_str()); - } - // reset color to default if we there is no pending user input - if (params.use_color && embd_inp.size() <= input_consumed) { - printf(ANSI_COLOR_RESET); + std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1)); + if(check != "�") { // ensure a multi-byte token is finished generating before outputting the text + std::string text; + processor.Decode(all_tokens, &text); + std::string chunk = text.substr(full_text.length()); + printf("%s", chunk.c_str()); + full_text += chunk; + + // reset color to default if we there is no pending user input + if (params.use_color && embd_inp.size() <= input_consumed) { + printf(ANSI_COLOR_RESET); + } + fflush(stdout); } - fflush(stdout); } // in interactive mode, and not currently processing queued inputs; From 67b1c842d958b96deb20080f75a681ba045b482f Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 02/10] Use sentencepiece tokenization --- .gitignore | 1 + Makefile | 4 ++-- README.md | 2 +- build.sh | 21 +++++++++++++++++++++ build_deps.sh | 12 ------------ main.cpp | 4 ++++ 6 files changed, 29 insertions(+), 15 deletions(-) create mode 100755 build.sh delete mode 100644 build_deps.sh diff --git a/.gitignore b/.gitignore index 5eb1ff1..57256fb 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ models/* arm_neon.h compile_commands.json +deps diff --git a/Makefile b/Makefile index 0b8464c..57a14bc 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ endif # CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -CXXFLAGS = -I. -I../../sentencepiece/src/ -O3 -DNDEBUG -std=c++11 -fPIC +CXXFLAGS = -I. -Ideps/sentencepiece-0.1.97/src/ -O3 -DNDEBUG -std=c++11 -fPIC LDFLAGS = # OS specific # TODO: support Windows @@ -187,7 +187,7 @@ clean: rm -f *.o main quantize main: main.cpp ggml.o utils.o - $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o /Users/billhamilton/src/sentencepiece/build/src/libsentencepiece.a -o main $(LDFLAGS) + $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o deps/libsentencepiece.a -o main $(LDFLAGS) ./main -h quantize: quantize.cpp ggml.o utils.o diff --git a/README.md b/README.md index dd3efae..1f211a0 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ Here are the step for the LLaMA-7B model: # build this repo git clone https://github.com/ggerganov/llama.cpp cd llama.cpp -make +./build.sh # obtain the original LLaMA model weights and place them in ./models ls ./models diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..1f9c004 --- /dev/null +++ b/build.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +if [ ! -d deps ] +then + mkdir deps +fi +cd deps +if [ ! -f v0.1.97.tar.gz ] +then + curl -LO https://github.com/google/sentencepiece/archive/refs/tags/v0.1.97.tar.gz +fi +if [ ! -f libsentencepiece.a ] +then + tar xzvf v0.1.97.tar.gz + cd sentencepiece-0.1.97/ && rm -rf build && mkdir build && cd build && cmake .. + make sentencepiece-static -j $(nproc) + cd ../.. + cp sentencepiece-0.1.97/build/src/libsentencepiece.a ./ +fi +cd .. +make diff --git a/build_deps.sh b/build_deps.sh deleted file mode 100644 index 444d207..0000000 --- a/build_deps.sh +++ /dev/null @@ -1,12 +0,0 @@ -#https://github.com/google/sentencepiece.git -#9ffb33a14c97c512103be0ee74740099660b39aa - -curl -LO https://github.com/google/sentencepiece/releases/download/v0.1.97/sentencepiece-0.1.97.tar.gz -tar xzvf sentencepiece-0.1.97.tar.gz -cd sentencepiece-0.1.97/src -mkdir build -cd build -cmake .. -make sentencepiece-static -j $(nproc) -cd ../.. - diff --git a/main.cpp b/main.cpp index 7490569..b78b846 100644 --- a/main.cpp +++ b/main.cpp @@ -855,6 +855,8 @@ int main(int argc, char ** argv) { printf("\n\n"); std::vector embd; + std::vector all_tokens; + std::string full_text = ""; // determine the required inference memory per token: size_t mem_per_token = 0; @@ -920,6 +922,7 @@ int main(int argc, char ** argv) { last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(id); + all_tokens.push_back(id); t_sample_us += ggml_time_us() - t_start_sample_us; } @@ -938,6 +941,7 @@ int main(int argc, char ** argv) { embd.push_back(embd_inp[input_consumed]); last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(embd_inp[input_consumed]); + all_tokens.push_back(embd_inp[input_consumed]); ++input_consumed; if (embd.size() > params.n_batch) { break; From 7deae8a2ca057029a7b3aaab43e100b3b9dbaff0 Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 03/10] fix build procedure --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c10e671..d2802f3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,7 @@ jobs: - name: Build run: | - make + build.sh macOS-latest: runs-on: macOS-latest @@ -31,7 +31,7 @@ jobs: - name: Build run: | - make + build.sh # ubuntu-latest-gcc: # runs-on: ubuntu-latest From 3c04dfb436f86dfaa8be45d6cedaac4ea1aee481 Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 04/10] run build in shell --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d2802f3..7c10638 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,7 @@ jobs: - name: Build run: | - build.sh + sh build.sh macOS-latest: runs-on: macOS-latest @@ -31,7 +31,7 @@ jobs: - name: Build run: | - build.sh + sh build.sh # ubuntu-latest-gcc: # runs-on: ubuntu-latest From 3e2327c96a8adf355614dbef5f523e98d014bcaf Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 05/10] Try manually adding CXX flag --- build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 1f9c004..0e61ba2 100755 --- a/build.sh +++ b/build.sh @@ -12,7 +12,8 @@ fi if [ ! -f libsentencepiece.a ] then tar xzvf v0.1.97.tar.gz - cd sentencepiece-0.1.97/ && rm -rf build && mkdir build && cd build && cmake .. + cd sentencepiece-0.1.97/ && rm -rf build && mkdir build && cd build + cmake -E env CXXFLAGS="-std=c++17" cmake .. make sentencepiece-static -j $(nproc) cd ../.. cp sentencepiece-0.1.97/build/src/libsentencepiece.a ./ From 07771aab813a065e5a0bced194588bc3e0593c3d Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 06/10] ensure cmake is proper version --- .github/workflows/build.yml | 2 +- build.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7c10638..2eccb30 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ jobs: - name: Dependencies run: | sudo apt-get update - sudo apt-get install build-essential + sudo apt-get install build-essential cmake - name: Build run: | diff --git a/build.sh b/build.sh index 0e61ba2..500366d 100755 --- a/build.sh +++ b/build.sh @@ -13,7 +13,8 @@ if [ ! -f libsentencepiece.a ] then tar xzvf v0.1.97.tar.gz cd sentencepiece-0.1.97/ && rm -rf build && mkdir build && cd build - cmake -E env CXXFLAGS="-std=c++17" cmake .. + cmake --version + cmake .. make sentencepiece-static -j $(nproc) cd ../.. cp sentencepiece-0.1.97/build/src/libsentencepiece.a ./ From ee36313770f1a9ca04618d1e7677d5c428326442 Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 07/10] Ah -std=c++17 is needed --- .github/workflows/build.yml | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2eccb30..7c10638 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ jobs: - name: Dependencies run: | sudo apt-get update - sudo apt-get install build-essential cmake + sudo apt-get install build-essential - name: Build run: | diff --git a/Makefile b/Makefile index 57a14bc..05a2039 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ endif # CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -CXXFLAGS = -I. -Ideps/sentencepiece-0.1.97/src/ -O3 -DNDEBUG -std=c++11 -fPIC +CXXFLAGS = -I. -Ideps/sentencepiece-0.1.97/src/ -O3 -DNDEBUG -std=c++17 -fPIC LDFLAGS = # OS specific # TODO: support Windows From 703571861fe789cdb4274713278630f6835d91b5 Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 08/10] undo complicated printing until its fixed sadly --- main.cpp | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/main.cpp b/main.cpp index b78b846..333ea2d 100644 --- a/main.cpp +++ b/main.cpp @@ -951,20 +951,29 @@ int main(int argc, char ** argv) { // display text if (!input_noecho) { - std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1)); - if(check != "�") { // ensure a multi-byte token is finished generating before outputting the text - std::string text; - processor.Decode(all_tokens, &text); - std::string chunk = text.substr(full_text.length()); - printf("%s", chunk.c_str()); - full_text += chunk; - - // reset color to default if we there is no pending user input - if (params.use_color && embd_inp.size() <= input_consumed) { - printf(ANSI_COLOR_RESET); - } - fflush(stdout); - } + + // std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1)); + // printf("[%s]", check.c_str()); + // if(check != "�") { // ensure a multi-byte token is finished generating before outputting the text + // std::string text; + // processor.Decode(all_tokens, &text); + // std::string chunk = text.substr(full_text.length()); + // printf("%s", chunk.c_str()); + // full_text.reserve (text.size()); + // full_text += chunk; + + // // reset color to default if we there is no pending user input + // if (params.use_color && embd_inp.size() <= input_consumed) { + // printf(ANSI_COLOR_RESET); + // } + // fflush(stdout); + // } + + // The code above crashes and is WIP any help appreciated + std::string text; + processor.Decode(all_tokens, &text); + printf("%s\n", text.c_str()); + fflush(stdout); } // in interactive mode, and not currently processing queued inputs; From 9425a21db54a7b8ad47c19416b50ec0b3be6cad9 Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 09/10] Bugfix and back to printing as normal Fix antiprompt --- main.cpp | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/main.cpp b/main.cpp index 333ea2d..36d6ddc 100644 --- a/main.cpp +++ b/main.cpp @@ -824,7 +824,8 @@ int main(int argc, char ** argv) { params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); // tokenize the reverse prompt - std::vector antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false); + std::vector antiprompt_inp; + processor.Encode(params.antiprompt, &antiprompt_inp); printf("\n"); printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); @@ -951,29 +952,28 @@ int main(int argc, char ** argv) { // display text if (!input_noecho) { - - // std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1)); - // printf("[%s]", check.c_str()); - // if(check != "�") { // ensure a multi-byte token is finished generating before outputting the text - // std::string text; - // processor.Decode(all_tokens, &text); - // std::string chunk = text.substr(full_text.length()); - // printf("%s", chunk.c_str()); - // full_text.reserve (text.size()); - // full_text += chunk; - - // // reset color to default if we there is no pending user input - // if (params.use_color && embd_inp.size() <= input_consumed) { - // printf(ANSI_COLOR_RESET); - // } - // fflush(stdout); - // } - - // The code above crashes and is WIP any help appreciated - std::string text; - processor.Decode(all_tokens, &text); - printf("%s\n", text.c_str()); - fflush(stdout); + // check if last token is unprintable token + std::string check; + std::vector check_token; + check_token.push_back(all_tokens.at(all_tokens.size()-1)); + processor.Decode(check_token, &check); + if(check != "�") { + // If the token is printable we wont attempt to print unprintable tokens + std::string text; + processor.Decode(all_tokens, &text); + if(full_text.length() < text.length()) { + std::string chunk = text.substr(full_text.length()); + printf("%s", chunk.c_str()); + full_text.empty(); + processor.Decode(all_tokens, &full_text); + // reset color to default if we there is no pending user input + if (params.use_color && embd_inp.size() <= input_consumed) { + printf(ANSI_COLOR_RESET); + } + fflush(stdout); + } + + } } // in interactive mode, and not currently processing queued inputs; From ce7ebb33198c0e99399de567e367bb2a63274d04 Mon Sep 17 00:00:00 2001 From: beiller Date: Wed, 8 Mar 2023 16:44:50 -0500 Subject: [PATCH 10/10] Another antiprompt fix --- main.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 36d6ddc..98abcc0 100644 --- a/main.cpp +++ b/main.cpp @@ -1004,7 +1004,8 @@ int main(int argc, char ** argv) { buf[n_read+1] = 0; } - std::vector line_inp = ::llama_tokenize(vocab, buf, false); + std::vector line_inp; + processor.Encode(buf, &antiprompt_inp); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); input_noecho = true; // do not echo this again