diff --git a/Makefile b/Makefile index 8388c29..0b8464c 100644 --- a/Makefile +++ b/Makefile @@ -31,9 +31,8 @@ endif # CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC -LDFLAGS = - +CXXFLAGS = -I. -I../../sentencepiece/src/ -O3 -DNDEBUG -std=c++11 -fPIC +LDFLAGS = # OS specific # TODO: support Windows ifeq ($(UNAME_S),Linux) @@ -188,7 +187,7 @@ clean: rm -f *.o main quantize main: main.cpp ggml.o utils.o - $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS) + $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o /Users/billhamilton/src/sentencepiece/build/src/libsentencepiece.a -o main $(LDFLAGS) ./main -h quantize: quantize.cpp ggml.o utils.o diff --git a/build_deps.sh b/build_deps.sh new file mode 100644 index 0000000..444d207 --- /dev/null +++ b/build_deps.sh @@ -0,0 +1,12 @@ +#https://github.com/google/sentencepiece.git +#9ffb33a14c97c512103be0ee74740099660b39aa + +curl -LO https://github.com/google/sentencepiece/releases/download/v0.1.97/sentencepiece-0.1.97.tar.gz +tar xzvf sentencepiece-0.1.97.tar.gz +cd sentencepiece-0.1.97/src +mkdir build +cd build +cmake .. +make sentencepiece-static -j $(nproc) +cd ../.. + diff --git a/main.cpp b/main.cpp index 387d35f..7490569 100644 --- a/main.cpp +++ b/main.cpp @@ -14,6 +14,12 @@ #include #include +#include + + +//Tokenizer object +sentencepiece::SentencePieceProcessor processor; + #define ANSI_COLOR_RED "\x1b[31m" #define ANSI_COLOR_GREEN "\x1b[32m" #define ANSI_COLOR_YELLOW "\x1b[33m" @@ -758,6 +764,11 @@ void sigint_handler(int signo) { } int main(int argc, char ** argv) { + const auto status = processor.Load("models/tokenizer.model"); + if (!status.ok()) { + printf("%s", status.ToString().c_str()); + // error + } ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); @@ -807,7 +818,8 @@ int main(int argc, char ** argv) { std::vector logits; // tokenize the prompt - std::vector embd_inp = ::llama_tokenize(vocab, params.prompt, true); + std::vector embd_inp; + processor.Encode(params.prompt, &embd_inp); params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); @@ -935,14 +947,20 @@ int main(int argc, char ** argv) { // display text if (!input_noecho) { - for (auto id : embd) { - printf("%s", vocab.id_to_token[id].c_str()); - } - // reset color to default if we there is no pending user input - if (params.use_color && embd_inp.size() <= input_consumed) { - printf(ANSI_COLOR_RESET); + std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1)); + if(check != "�") { // ensure a multi-byte token is finished generating before outputting the text + std::string text; + processor.Decode(all_tokens, &text); + std::string chunk = text.substr(full_text.length()); + printf("%s", chunk.c_str()); + full_text += chunk; + + // reset color to default if we there is no pending user input + if (params.use_color && embd_inp.size() <= input_consumed) { + printf(ANSI_COLOR_RESET); + } + fflush(stdout); } - fflush(stdout); } // in interactive mode, and not currently processing queued inputs;