From 96dc6a0c6859549f996bf32067ed22fc30610547 Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 01/10] work towards tokenizer integration

---
 Makefile      |  7 +++----
 build_deps.sh | 12 ++++++++++++
 main.cpp      | 34 ++++++++++++++++++++++++++--------
 3 files changed, 41 insertions(+), 12 deletions(-)
 create mode 100644 build_deps.sh
diff --git a/Makefile b/Makefile
index 8388c29..0b8464c 100644
--- a/Makefile
+++ b/Makefile
@@ -31,9 +31,8 @@ endif
 #
 
 CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
-CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
-LDFLAGS  =
-
+CXXFLAGS = -I. -I../../sentencepiece/src/ -O3 -DNDEBUG -std=c++11 -fPIC
+LDFLAGS  = 
 # OS specific
 # TODO: support Windows
 ifeq ($(UNAME_S),Linux)
@@ -188,7 +187,7 @@ clean:
 	rm -f *.o main quantize
 
 main: main.cpp ggml.o utils.o
-	$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o /Users/billhamilton/src/sentencepiece/build/src/libsentencepiece.a -o main $(LDFLAGS)
 	./main -h
 
 quantize: quantize.cpp ggml.o utils.o
diff --git a/build_deps.sh b/build_deps.sh
new file mode 100644
index 0000000..444d207
--- /dev/null
+++ b/build_deps.sh
@@ -0,0 +1,12 @@
+#https://github.com/google/sentencepiece.git
+#9ffb33a14c97c512103be0ee74740099660b39aa
+
+curl -LO https://github.com/google/sentencepiece/releases/download/v0.1.97/sentencepiece-0.1.97.tar.gz
+tar xzvf sentencepiece-0.1.97.tar.gz
+cd sentencepiece-0.1.97/src
+mkdir build
+cd build
+cmake ..
+make sentencepiece-static -j $(nproc)
+cd ../..
+
diff --git a/main.cpp b/main.cpp
index 387d35f..7490569 100644
--- a/main.cpp
+++ b/main.cpp
@@ -14,6 +14,12 @@
 #include <signal.h>
 #include <unistd.h>
 
+#include <sentencepiece_processor.h>
+
+
+//Tokenizer object
+sentencepiece::SentencePieceProcessor processor;
+
 #define ANSI_COLOR_RED     "\x1b[31m"
 #define ANSI_COLOR_GREEN   "\x1b[32m"
 #define ANSI_COLOR_YELLOW  "\x1b[33m"
@@ -758,6 +764,11 @@ void sigint_handler(int signo) {
 }
 
 int main(int argc, char ** argv) {
+    const auto status = processor.Load("models/tokenizer.model");
+    if (!status.ok()) {
+       printf("%s", status.ToString().c_str());
+       // error
+    }
     ggml_time_init();
     const int64_t t_main_start_us = ggml_time_us();
 
@@ -807,7 +818,8 @@ int main(int argc, char ** argv) {
     std::vector<float> logits;
 
     // tokenize the prompt
-    std::vector<gpt_vocab::id> embd_inp = ::llama_tokenize(vocab, params.prompt, true);
+    std::vector<gpt_vocab::id> embd_inp;
+    processor.Encode(params.prompt, &embd_inp);
 
     params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
 
@@ -935,14 +947,20 @@ int main(int argc, char ** argv) {
 
         // display text
         if (!input_noecho) {
-            for (auto id : embd) {
-                printf("%s", vocab.id_to_token[id].c_str());
-            }
-            // reset color to default if we there is no pending user input
-            if (params.use_color && embd_inp.size() <= input_consumed) {
-                printf(ANSI_COLOR_RESET);
+            std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1));
+            if(check != "�") {  // ensure a multi-byte token is finished generating before outputting the text
+                std::string text;
+                processor.Decode(all_tokens, &text);
+                std::string chunk = text.substr(full_text.length());
+                printf("%s", chunk.c_str());
+                full_text += chunk;
+
+                // reset color to default if we there is no pending user input
+                if (params.use_color && embd_inp.size() <= input_consumed) {
+                    printf(ANSI_COLOR_RESET);
+                }
+                fflush(stdout);
             }
-            fflush(stdout);
         }
 
         // in interactive mode, and not currently processing queued inputs;

From 67b1c842d958b96deb20080f75a681ba045b482f Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 02/10] Use sentencepiece tokenization

---
 .gitignore    |  1 +
 Makefile      |  4 ++--
 README.md     |  2 +-
 build.sh      | 21 +++++++++++++++++++++
 build_deps.sh | 12 ------------
 main.cpp      |  4 ++++
 6 files changed, 29 insertions(+), 15 deletions(-)
 create mode 100755 build.sh
 delete mode 100644 build_deps.sh

diff --git a/.gitignore b/.gitignore
index 5eb1ff1..57256fb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,4 @@ models/*
 
 arm_neon.h
 compile_commands.json
+deps
diff --git a/Makefile b/Makefile
index 0b8464c..57a14bc 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ endif
 #
 
 CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
-CXXFLAGS = -I. -I../../sentencepiece/src/ -O3 -DNDEBUG -std=c++11 -fPIC
+CXXFLAGS = -I. -Ideps/sentencepiece-0.1.97/src/ -O3 -DNDEBUG -std=c++11 -fPIC
 LDFLAGS  = 
 # OS specific
 # TODO: support Windows
@@ -187,7 +187,7 @@ clean:
 	rm -f *.o main quantize
 
 main: main.cpp ggml.o utils.o
-	$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o /Users/billhamilton/src/sentencepiece/build/src/libsentencepiece.a -o main $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o deps/libsentencepiece.a -o main $(LDFLAGS)
 	./main -h
 
 quantize: quantize.cpp ggml.o utils.o
diff --git a/README.md b/README.md
index dd3efae..1f211a0 100644
--- a/README.md
+++ b/README.md
@@ -132,7 +132,7 @@ Here are the step for the LLaMA-7B model:
 # build this repo
 git clone https://github.com/ggerganov/llama.cpp
 cd llama.cpp
-make
+./build.sh
 
 # obtain the original LLaMA model weights and place them in ./models
 ls ./models
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..1f9c004
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+if [ ! -d deps ]
+then
+    mkdir deps
+fi
+cd deps
+if [ ! -f v0.1.97.tar.gz ]
+then
+    curl -LO https://github.com/google/sentencepiece/archive/refs/tags/v0.1.97.tar.gz
+fi
+if [ ! -f libsentencepiece.a ]
+then
+    tar xzvf v0.1.97.tar.gz
+    cd sentencepiece-0.1.97/ && rm -rf build && mkdir build && cd build && cmake ..
+    make sentencepiece-static -j $(nproc)
+    cd ../..
+    cp sentencepiece-0.1.97/build/src/libsentencepiece.a ./
+fi
+cd ..
+make
diff --git a/build_deps.sh b/build_deps.sh
deleted file mode 100644
index 444d207..0000000
--- a/build_deps.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#https://github.com/google/sentencepiece.git
-#9ffb33a14c97c512103be0ee74740099660b39aa
-
-curl -LO https://github.com/google/sentencepiece/releases/download/v0.1.97/sentencepiece-0.1.97.tar.gz
-tar xzvf sentencepiece-0.1.97.tar.gz
-cd sentencepiece-0.1.97/src
-mkdir build
-cd build
-cmake ..
-make sentencepiece-static -j $(nproc)
-cd ../..
-
diff --git a/main.cpp b/main.cpp
index 7490569..b78b846 100644
--- a/main.cpp
+++ b/main.cpp
@@ -855,6 +855,8 @@ int main(int argc, char ** argv) {
     printf("\n\n");
 
     std::vector<gpt_vocab::id> embd;
+    std::vector<gpt_vocab::id> all_tokens;
+    std::string full_text = "";
 
     // determine the required inference memory per token:
     size_t mem_per_token = 0;
@@ -920,6 +922,7 @@ int main(int argc, char ** argv) {
 
                 last_n_tokens.erase(last_n_tokens.begin());
                 last_n_tokens.push_back(id);
+                all_tokens.push_back(id);
 
                 t_sample_us += ggml_time_us() - t_start_sample_us;
             }
@@ -938,6 +941,7 @@ int main(int argc, char ** argv) {
                 embd.push_back(embd_inp[input_consumed]);
                 last_n_tokens.erase(last_n_tokens.begin());
                 last_n_tokens.push_back(embd_inp[input_consumed]);
+                all_tokens.push_back(embd_inp[input_consumed]);
                 ++input_consumed;
                 if (embd.size() > params.n_batch) {
                     break;

From 7deae8a2ca057029a7b3aaab43e100b3b9dbaff0 Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 03/10] fix build procedure

---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index c10e671..d2802f3 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -16,7 +16,7 @@ jobs:
 
       - name: Build
         run: |
-          make
+          build.sh
 
   macOS-latest:
     runs-on: macOS-latest
@@ -31,7 +31,7 @@ jobs:
 
       - name: Build
         run: |
-          make
+          build.sh
 
 #  ubuntu-latest-gcc:
 #    runs-on: ubuntu-latest

From 3c04dfb436f86dfaa8be45d6cedaac4ea1aee481 Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 04/10] run build in shell

---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d2802f3..7c10638 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -16,7 +16,7 @@ jobs:
 
       - name: Build
         run: |
-          build.sh
+          sh build.sh
 
   macOS-latest:
     runs-on: macOS-latest
@@ -31,7 +31,7 @@ jobs:
 
       - name: Build
         run: |
-          build.sh
+          sh build.sh
 
 #  ubuntu-latest-gcc:
 #    runs-on: ubuntu-latest

From 3e2327c96a8adf355614dbef5f523e98d014bcaf Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 05/10] Try manually adding CXX flag

---
 build.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/build.sh b/build.sh
index 1f9c004..0e61ba2 100755
--- a/build.sh
+++ b/build.sh
@@ -12,7 +12,8 @@ fi
 if [ ! -f libsentencepiece.a ]
 then
     tar xzvf v0.1.97.tar.gz
-    cd sentencepiece-0.1.97/ && rm -rf build && mkdir build && cd build && cmake ..
+    cd sentencepiece-0.1.97/ && rm -rf build && mkdir build && cd build
+    cmake -E env CXXFLAGS="-std=c++17" cmake ..
     make sentencepiece-static -j $(nproc)
     cd ../..
     cp sentencepiece-0.1.97/build/src/libsentencepiece.a ./

From 07771aab813a065e5a0bced194588bc3e0593c3d Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 06/10] ensure cmake is proper version

---
 .github/workflows/build.yml | 2 +-
 build.sh                    | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 7c10638..2eccb30 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,7 +12,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install build-essential
+          sudo apt-get install build-essential cmake
 
       - name: Build
         run: |
diff --git a/build.sh b/build.sh
index 0e61ba2..500366d 100755
--- a/build.sh
+++ b/build.sh
@@ -13,7 +13,8 @@ if [ ! -f libsentencepiece.a ]
 then
     tar xzvf v0.1.97.tar.gz
     cd sentencepiece-0.1.97/ && rm -rf build && mkdir build && cd build
-    cmake -E env CXXFLAGS="-std=c++17" cmake ..
+    cmake --version
+    cmake ..
     make sentencepiece-static -j $(nproc)
     cd ../..
     cp sentencepiece-0.1.97/build/src/libsentencepiece.a ./

From ee36313770f1a9ca04618d1e7677d5c428326442 Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 07/10] Ah -std=c++17 is needed

---
 .github/workflows/build.yml | 2 +-
 Makefile                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 2eccb30..7c10638 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,7 +12,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install build-essential cmake
+          sudo apt-get install build-essential
 
       - name: Build
         run: |
diff --git a/Makefile b/Makefile
index 57a14bc..05a2039 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ endif
 #
 
 CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
-CXXFLAGS = -I. -Ideps/sentencepiece-0.1.97/src/ -O3 -DNDEBUG -std=c++11 -fPIC
+CXXFLAGS = -I. -Ideps/sentencepiece-0.1.97/src/ -O3 -DNDEBUG -std=c++17 -fPIC
 LDFLAGS  = 
 # OS specific
 # TODO: support Windows

From 703571861fe789cdb4274713278630f6835d91b5 Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 08/10] undo complicated printing until its fixed sadly

---
 main.cpp | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/main.cpp b/main.cpp
index b78b846..333ea2d 100644
--- a/main.cpp
+++ b/main.cpp
@@ -951,20 +951,29 @@ int main(int argc, char ** argv) {
 
         // display text
         if (!input_noecho) {
-            std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1));
-            if(check != "�") {  // ensure a multi-byte token is finished generating before outputting the text
-                std::string text;
-                processor.Decode(all_tokens, &text);
-                std::string chunk = text.substr(full_text.length());
-                printf("%s", chunk.c_str());
-                full_text += chunk;
-
-                // reset color to default if we there is no pending user input
-                if (params.use_color && embd_inp.size() <= input_consumed) {
-                    printf(ANSI_COLOR_RESET);
-                }
-                fflush(stdout);
-            }
+            
+            // std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1));
+            // printf("[%s]", check.c_str());
+            // if(check != "�") {  // ensure a multi-byte token is finished generating before outputting the text
+            //     std::string text;
+            //     processor.Decode(all_tokens, &text);
+            //     std::string chunk = text.substr(full_text.length());
+            //     printf("%s", chunk.c_str());
+            //     full_text.reserve (text.size());
+            //     full_text += chunk;
+
+            //     // reset color to default if we there is no pending user input
+            //     if (params.use_color && embd_inp.size() <= input_consumed) {
+            //         printf(ANSI_COLOR_RESET);
+            //     }
+            //     fflush(stdout);
+            // }
+
+            // The code above crashes and is WIP any help appreciated
+            std::string text;
+            processor.Decode(all_tokens, &text);
+            printf("%s\n", text.c_str());
+            fflush(stdout);
         }
 
         // in interactive mode, and not currently processing queued inputs;

From 9425a21db54a7b8ad47c19416b50ec0b3be6cad9 Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 09/10] Bugfix and back to printing as normal Fix antiprompt

---
 main.cpp | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/main.cpp b/main.cpp
index 333ea2d..36d6ddc 100644
--- a/main.cpp
+++ b/main.cpp
@@ -824,7 +824,8 @@ int main(int argc, char ** argv) {
     params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
 
     // tokenize the reverse prompt
-    std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false);
+    std::vector<gpt_vocab::id> antiprompt_inp;
+    processor.Encode(params.antiprompt, &antiprompt_inp);
 
     printf("\n");
     printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
@@ -951,29 +952,28 @@ int main(int argc, char ** argv) {
 
         // display text
         if (!input_noecho) {
-            
-            // std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1));
-            // printf("[%s]", check.c_str());
-            // if(check != "�") {  // ensure a multi-byte token is finished generating before outputting the text
-            //     std::string text;
-            //     processor.Decode(all_tokens, &text);
-            //     std::string chunk = text.substr(full_text.length());
-            //     printf("%s", chunk.c_str());
-            //     full_text.reserve (text.size());
-            //     full_text += chunk;
-
-            //     // reset color to default if we there is no pending user input
-            //     if (params.use_color && embd_inp.size() <= input_consumed) {
-            //         printf(ANSI_COLOR_RESET);
-            //     }
-            //     fflush(stdout);
-            // }
-
-            // The code above crashes and is WIP any help appreciated
-            std::string text;
-            processor.Decode(all_tokens, &text);
-            printf("%s\n", text.c_str());
-            fflush(stdout);
+            // check if last token is unprintable token
+            std::string check;
+            std::vector<gpt_vocab::id> check_token;
+            check_token.push_back(all_tokens.at(all_tokens.size()-1));
+            processor.Decode(check_token, &check);
+            if(check != "�") { 
+                // If the token is printable we wont attempt to print unprintable tokens
+                std::string text;
+                processor.Decode(all_tokens, &text);
+                if(full_text.length() < text.length()) {
+                    std::string chunk = text.substr(full_text.length());
+                    printf("%s", chunk.c_str());
+                    full_text.empty();
+                    processor.Decode(all_tokens, &full_text);                    
+                    // reset color to default if we there is no pending user input
+                    if (params.use_color && embd_inp.size() <= input_consumed) {
+                        printf(ANSI_COLOR_RESET);
+                    }
+                    fflush(stdout);
+                }
+
+            }
         }
 
         // in interactive mode, and not currently processing queued inputs;

From ce7ebb33198c0e99399de567e367bb2a63274d04 Mon Sep 17 00:00:00 2001
From: beiller <beiller@gmail.com>
Date: Wed, 8 Mar 2023 16:44:50 -0500
Subject: [PATCH 10/10] Another antiprompt fix

---
 main.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/main.cpp b/main.cpp
index 36d6ddc..98abcc0 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1004,7 +1004,8 @@ int main(int argc, char ** argv) {
                         buf[n_read+1] = 0;
                     }
 
-                    std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(vocab, buf, false);
+                    std::vector<gpt_vocab::id> line_inp;
+                    processor.Encode(buf, &antiprompt_inp);
                     embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
 
                     input_noecho = true; // do not echo this again