From 4e887dc350a17ab8077e8b58e9555b084b48bc8e Mon Sep 17 00:00:00 2001 From: Joonas Pihlajamaa Date: Sun, 23 Oct 2022 11:55:01 +0300 Subject: [PATCH] Add enconding parameter to vocab.json opening to fix errors --- models/convert-pt-to-ggml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/convert-pt-to-ggml.py b/models/convert-pt-to-ggml.py index 9e9b2dc..04792d7 100644 --- a/models/convert-pt-to-ggml.py +++ b/models/convert-pt-to-ggml.py @@ -234,7 +234,7 @@ dir_tokenizer = tokenizer.name_or_path # output in the same directory as the model fname_out = dir_out + "/ggml-model.bin" -with open(dir_tokenizer + "/vocab.json", "r") as f: +with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f: tokens = json.load(f) # use 16-bit or 32-bit floats