diff --git a/Makefile b/Makefile
index 8388c29..0b8464c 100644
--- a/Makefile
+++ b/Makefile
@@ -31,9 +31,8 @@ endif
 #
 
 CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
-CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
-LDFLAGS  =
-
+CXXFLAGS = -I. -I../../sentencepiece/src/ -O3 -DNDEBUG -std=c++11 -fPIC
+LDFLAGS  = 
 # OS specific
 # TODO: support Windows
 ifeq ($(UNAME_S),Linux)
@@ -188,7 +187,7 @@ clean:
 	rm -f *.o main quantize
 
 main: main.cpp ggml.o utils.o
-	$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o /Users/billhamilton/src/sentencepiece/build/src/libsentencepiece.a -o main $(LDFLAGS)
 	./main -h
 
 quantize: quantize.cpp ggml.o utils.o
diff --git a/build_deps.sh b/build_deps.sh
new file mode 100644
index 0000000..444d207
--- /dev/null
+++ b/build_deps.sh
@@ -0,0 +1,12 @@
+#https://github.com/google/sentencepiece.git
+#9ffb33a14c97c512103be0ee74740099660b39aa
+
+curl -LO https://github.com/google/sentencepiece/releases/download/v0.1.97/sentencepiece-0.1.97.tar.gz
+tar xzvf sentencepiece-0.1.97.tar.gz
+cd sentencepiece-0.1.97/src
+mkdir build
+cd build
+cmake ..
+make sentencepiece-static -j $(nproc)
+cd ../..
+
diff --git a/main.cpp b/main.cpp
index 387d35f..7490569 100644
--- a/main.cpp
+++ b/main.cpp
@@ -14,6 +14,12 @@
 #include <signal.h>
 #include <unistd.h>
 
+#include <sentencepiece_processor.h>
+
+
+//Tokenizer object
+sentencepiece::SentencePieceProcessor processor;
+
 #define ANSI_COLOR_RED     "\x1b[31m"
 #define ANSI_COLOR_GREEN   "\x1b[32m"
 #define ANSI_COLOR_YELLOW  "\x1b[33m"
@@ -758,6 +764,11 @@ void sigint_handler(int signo) {
 }
 
 int main(int argc, char ** argv) {
+    const auto status = processor.Load("models/tokenizer.model");
+    if (!status.ok()) {
+       printf("%s", status.ToString().c_str());
+       // error
+    }
     ggml_time_init();
     const int64_t t_main_start_us = ggml_time_us();
 
@@ -807,7 +818,8 @@ int main(int argc, char ** argv) {
     std::vector<float> logits;
 
     // tokenize the prompt
-    std::vector<gpt_vocab::id> embd_inp = ::llama_tokenize(vocab, params.prompt, true);
+    std::vector<gpt_vocab::id> embd_inp;
+    processor.Encode(params.prompt, &embd_inp);
 
     params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
 
@@ -935,14 +947,20 @@ int main(int argc, char ** argv) {
 
         // display text
         if (!input_noecho) {
-            for (auto id : embd) {
-                printf("%s", vocab.id_to_token[id].c_str());
-            }
-            // reset color to default if we there is no pending user input
-            if (params.use_color && embd_inp.size() <= input_consumed) {
-                printf(ANSI_COLOR_RESET);
+            std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1));
+            if(check != "�") {  // ensure a multi-byte token is finished generating before outputting the text
+                std::string text;
+                processor.Decode(all_tokens, &text);
+                std::string chunk = text.substr(full_text.length());
+                printf("%s", chunk.c_str());
+                full_text += chunk;
+
+                // reset color to default if we there is no pending user input
+                if (params.use_color && embd_inp.size() <= input_consumed) {
+                    printf(ANSI_COLOR_RESET);
+                }
+                fflush(stdout);
             }
-            fflush(stdout);
         }
 
         // in interactive mode, and not currently processing queued inputs;