Use sentencepiece tokenization

pull/66/head
beiller 1 year ago
parent 96dc6a0c68
commit 67b1c842d9
No known key found for this signature in database
GPG Key ID: 5AC5D1B01D0E5D75

1
.gitignore vendored

@ -21,3 +21,4 @@ models/*
arm_neon.h
compile_commands.json
deps

@ -31,7 +31,7 @@ endif
#
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
CXXFLAGS = -I. -I../../sentencepiece/src/ -O3 -DNDEBUG -std=c++11 -fPIC
CXXFLAGS = -I. -Ideps/sentencepiece-0.1.97/src/ -O3 -DNDEBUG -std=c++11 -fPIC
LDFLAGS =
# OS specific
# TODO: support Windows
@ -187,7 +187,7 @@ clean:
rm -f *.o main quantize
main: main.cpp ggml.o utils.o
$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o /Users/billhamilton/src/sentencepiece/build/src/libsentencepiece.a -o main $(LDFLAGS)
$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o deps/libsentencepiece.a -o main $(LDFLAGS)
./main -h
quantize: quantize.cpp ggml.o utils.o

@ -132,7 +132,7 @@ Here are the step for the LLaMA-7B model:
# build this repo
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
make
./build.sh
# obtain the original LLaMA model weights and place them in ./models
ls ./models

@ -0,0 +1,21 @@
#!/bin/sh
if [ ! -d deps ]
then
mkdir deps
fi
cd deps
if [ ! -f v0.1.97.tar.gz ]
then
curl -LO https://github.com/google/sentencepiece/archive/refs/tags/v0.1.97.tar.gz
fi
if [ ! -f libsentencepiece.a ]
then
tar xzvf v0.1.97.tar.gz
cd sentencepiece-0.1.97/ && rm -rf build && mkdir build && cd build && cmake ..
make sentencepiece-static -j $(nproc)
cd ../..
cp sentencepiece-0.1.97/build/src/libsentencepiece.a ./
fi
cd ..
make

@ -1,12 +0,0 @@
#https://github.com/google/sentencepiece.git
#9ffb33a14c97c512103be0ee74740099660b39aa
curl -LO https://github.com/google/sentencepiece/releases/download/v0.1.97/sentencepiece-0.1.97.tar.gz
tar xzvf sentencepiece-0.1.97.tar.gz
cd sentencepiece-0.1.97/src
mkdir build
cd build
cmake ..
make sentencepiece-static -j $(nproc)
cd ../..

@ -855,6 +855,8 @@ int main(int argc, char ** argv) {
printf("\n\n");
std::vector<gpt_vocab::id> embd;
std::vector<gpt_vocab::id> all_tokens;
std::string full_text = "";
// determine the required inference memory per token:
size_t mem_per_token = 0;
@ -920,6 +922,7 @@ int main(int argc, char ** argv) {
last_n_tokens.erase(last_n_tokens.begin());
last_n_tokens.push_back(id);
all_tokens.push_back(id);
t_sample_us += ggml_time_us() - t_start_sample_us;
}
@ -938,6 +941,7 @@ int main(int argc, char ** argv) {
embd.push_back(embd_inp[input_consumed]);
last_n_tokens.erase(last_n_tokens.begin());
last_n_tokens.push_back(embd_inp[input_consumed]);
all_tokens.push_back(embd_inp[input_consumed]);
++input_consumed;
if (embd.size() > params.n_batch) {
break;

Loading…
Cancel
Save