From 949f97a8b449b187422da982d3591139dea05b6d Mon Sep 17 00:00:00 2001 From: RyanChang Date: Mon, 17 Oct 2022 21:19:45 +0800 Subject: [PATCH] fix missing token issue --- convert-pt-to-ggml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-pt-to-ggml.py b/convert-pt-to-ggml.py index 22bd12e..9e9b2dc 100644 --- a/convert-pt-to-ggml.py +++ b/convert-pt-to-ggml.py @@ -271,7 +271,7 @@ byte_decoder = {v:k for k, v in byte_encoder.items()} fout.write(struct.pack("i", len(tokens))) for key in tokens: - text = bytearray([byte_decoder[c] for c in key]).decode('utf-8', errors='replace').encode('utf-8') + text = bytearray([byte_decoder[c] for c in key]) fout.write(struct.pack("i", len(text))) fout.write(text)