From 9fe7306f4b16a974361b6a8bea370d6f5c3552f2 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 6 Dec 2022 18:48:57 +0200 Subject: [PATCH] models : add the new "large" model release by OpenAI The old "large" model is now renamed "large-v1". If you have been using it, make sure to rename it and download the new "large" model for best results. --- Makefile | 3 ++- README.md | 3 ++- examples/livestream.sh | 2 +- extra/convert-all.sh | 2 +- models/README.md | 3 ++- models/download-ggml-model.cmd | 2 +- models/download-ggml-model.sh | 2 +- tests/run-tests.sh | 4 ++-- 8 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 1224818..991f650 100644 --- a/Makefile +++ b/Makefile @@ -189,9 +189,10 @@ samples: .PHONY: small .PHONY: medium.en .PHONY: medium +.PHONY: large-v1 .PHONY: large -tiny.en tiny base.en base small.en small medium.en medium large: main +tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main bash ./models/download-ggml-model.sh $@ @echo "" @echo "===============================================" diff --git a/README.md b/README.md index 9c77782..0b9005a 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,7 @@ make small.en make small make medium.en make medium +make large-v1 make large ``` @@ -217,7 +218,7 @@ make large | base | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` | | small | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` | | medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | -| large | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | +| large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` | ## Another example diff --git a/examples/livestream.sh b/examples/livestream.sh index 167ce0b..de279a8 100755 --- a/examples/livestream.sh +++ b/examples/livestream.sh @@ -34,7 +34,7 @@ if [ -n "$3" ]; then fi # Whisper models -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" ) # list available models function list_models { diff --git a/extra/convert-all.sh b/extra/convert-all.sh index 20801af..c5ba909 100755 --- a/extra/convert-all.sh +++ b/extra/convert-all.sh @@ -1,6 +1,6 @@ #!/bin/bash -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" ) for model in "${models[@]}"; do python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/ diff --git a/models/README.md b/models/README.md index 42a2aaf..64ce6b3 100644 --- a/models/README.md +++ b/models/README.md @@ -37,7 +37,8 @@ https://huggingface.co/datasets/ggerganov/whisper.cpp/tree/main | small.en | 466 MB | ~1.0 GB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` | | medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | | medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` | -| large | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | +| large-v1 | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | +| large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` | ## Model files for testing purposes diff --git a/models/download-ggml-model.cmd b/models/download-ggml-model.cmd index 52fde94..a20ac80 100644 --- a/models/download-ggml-model.cmd +++ b/models/download-ggml-model.cmd @@ -7,7 +7,7 @@ popd set argc=0 for %%x in (%*) do set /A argc+=1 -set models=tiny.en tiny base.en base small.en small medium.en medium large +set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large if %argc% neq 1 ( echo. diff --git a/models/download-ggml-model.sh b/models/download-ggml-model.sh index e85e04c..cf54623 100755 --- a/models/download-ggml-model.sh +++ b/models/download-ggml-model.sh @@ -22,7 +22,7 @@ function get_script_path() { models_path=$(get_script_path) # Whisper models -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" ) # list available models function list_models { diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 048dfbd..eda6bbe 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -19,7 +19,7 @@ cd `dirname $0` # Whisper models -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" ) # list available models function list_models { @@ -107,7 +107,7 @@ function run_lang() { $main -m ../models/ggml-$model.bin -f $fname_dst -l $lang -otxt 2> /dev/null - git diff --no-index --word-diff=color --word-diff-regex=. $fname_dst.txt $lang-$i-ref.txt + git diff --no-index --word-diff=color --word-diff-regex=. $lang-$i-ref.txt $fname_dst.txt i=$(($i+1)) done