|
|
|
@ -47,8 +47,10 @@ def bytes_to_unicode():
|
|
|
|
|
cs = [chr(n) for n in cs]
|
|
|
|
|
return dict(zip(bs, cs))
|
|
|
|
|
|
|
|
|
|
if len(sys.argv) < 2:
|
|
|
|
|
if len(sys.argv) < 3:
|
|
|
|
|
print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n")
|
|
|
|
|
print(" ftype == 0 -> float32")
|
|
|
|
|
print(" ftype == 1 -> float16")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
# output in the same directory as the model
|
|
|
|
@ -64,11 +66,21 @@ with open(dir_model + "/added_tokens.json", "r") as f:
|
|
|
|
|
with open(dir_model + "/config.json", "r") as f:
|
|
|
|
|
hparams = json.load(f)
|
|
|
|
|
|
|
|
|
|
# use 16-bit or 32-bit floats
|
|
|
|
|
use_f16 = True
|
|
|
|
|
# possible data types
|
|
|
|
|
# ftype == 0 -> float32
|
|
|
|
|
# ftype == 1 -> float16
|
|
|
|
|
#
|
|
|
|
|
# map from ftype to string
|
|
|
|
|
ftype_str = ["f32", "f16"]
|
|
|
|
|
|
|
|
|
|
ftype = 1
|
|
|
|
|
if len(sys.argv) > 2:
|
|
|
|
|
use_f16 = False
|
|
|
|
|
fname_out = sys.argv[1] + "/ggml-model-f32.bin"
|
|
|
|
|
ftype = int(sys.argv[2])
|
|
|
|
|
if ftype < 0 or ftype > 1:
|
|
|
|
|
print("Invalid ftype: " + str(ftype))
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = GPTJForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True)
|
|
|
|
|
#print (model)
|
|
|
|
@ -85,7 +97,7 @@ fout.write(struct.pack("i", hparams["n_embd"]))
|
|
|
|
|
fout.write(struct.pack("i", hparams["n_head"]))
|
|
|
|
|
fout.write(struct.pack("i", hparams["n_layer"]))
|
|
|
|
|
fout.write(struct.pack("i", hparams["rotary_dim"]))
|
|
|
|
|
fout.write(struct.pack("i", use_f16))
|
|
|
|
|
fout.write(struct.pack("i", ftype))
|
|
|
|
|
|
|
|
|
|
byte_encoder = bytes_to_unicode()
|
|
|
|
|
byte_decoder = {v:k for k, v in byte_encoder.items()}
|
|
|
|
@ -114,34 +126,35 @@ for name in list_vars.keys():
|
|
|
|
|
n_dims = len(data.shape);
|
|
|
|
|
|
|
|
|
|
# ftype == 0 -> float32, ftype == 1 -> float16
|
|
|
|
|
ftype = 0;
|
|
|
|
|
if use_f16:
|
|
|
|
|
ftype_cur = 0;
|
|
|
|
|
if ftype != 0:
|
|
|
|
|
if name[-7:] == ".weight" and n_dims == 2:
|
|
|
|
|
print(" Converting to float16")
|
|
|
|
|
data = data.astype(np.float16)
|
|
|
|
|
ftype = 1
|
|
|
|
|
ftype_cur = 1
|
|
|
|
|
else:
|
|
|
|
|
print(" Converting to float32")
|
|
|
|
|
data = data.astype(np.float32)
|
|
|
|
|
ftype = 0
|
|
|
|
|
ftype_cur = 0
|
|
|
|
|
|
|
|
|
|
# for efficiency - transpose these matrices:
|
|
|
|
|
# (note - with latest ggml this is no longer more efficient, so disabling it)
|
|
|
|
|
# "transformer.h.*.mlp.fc_in.weight"
|
|
|
|
|
# "transformer.h.*.attn.out_proj.weight"
|
|
|
|
|
# "transformer.h.*.attn.q_proj.weight"
|
|
|
|
|
# "transformer.h.*.attn.k_proj.weight"
|
|
|
|
|
# "transformer.h.*.attn.v_proj.weight"
|
|
|
|
|
if name.endswith(".mlp.fc_in.weight") or \
|
|
|
|
|
name.endswith(".attn.out_proj.weight") or \
|
|
|
|
|
name.endswith(".attn.q_proj.weight") or \
|
|
|
|
|
name.endswith(".attn.k_proj.weight") or \
|
|
|
|
|
name.endswith(".attn.v_proj.weight"):
|
|
|
|
|
print(" Transposing")
|
|
|
|
|
data = data.transpose()
|
|
|
|
|
#if name.endswith(".mlp.fc_in.weight") or \
|
|
|
|
|
# name.endswith(".attn.out_proj.weight") or \
|
|
|
|
|
# name.endswith(".attn.q_proj.weight") or \
|
|
|
|
|
# name.endswith(".attn.k_proj.weight") or \
|
|
|
|
|
# name.endswith(".attn.v_proj.weight"):
|
|
|
|
|
# print(" Transposing")
|
|
|
|
|
# data = data.transpose()
|
|
|
|
|
|
|
|
|
|
# header
|
|
|
|
|
str = name.encode('utf-8')
|
|
|
|
|
fout.write(struct.pack("iii", n_dims, len(str), ftype))
|
|
|
|
|
fout.write(struct.pack("iii", n_dims, len(str), ftype_cur))
|
|
|
|
|
for i in range(n_dims):
|
|
|
|
|
fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
|
|
|
|
|
fout.write(str);
|
|
|
|
|