From 6b45e37b2b4a536f6e7c7df8f7c15d94623df2e9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 22 Oct 2022 18:17:08 +0300 Subject: [PATCH] Update README.md and finalize the whisper.wasm example --- CMakeLists.txt | 1 + Makefile | 2 +- README.md | 2 +- examples/whisper.wasm/README.md | 26 ++++++++++++++++++- examples/whisper.wasm/index-tmpl.html | 12 +++++++-- extra/convert-all.sh | 2 +- .../convert-pt-to-ggml.py | 0 7 files changed, 39 insertions(+), 6 deletions(-) rename convert-pt-to-ggml.py => models/convert-pt-to-ggml.py (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index c1e44a4..f485cf5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -124,6 +124,7 @@ else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2 /D_CRT_SECURE_NO_WARNINGS=1") else() if (EMSCRIPTEN) + # we require support for WASM SIMD 128-bit set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") else() diff --git a/Makefile b/Makefile index 931e2d1..8f8cbbe 100644 --- a/Makefile +++ b/Makefile @@ -90,7 +90,7 @@ libwhisper.a: ggml.o whisper.o ar rcs libwhisper.a ggml.o whisper.o clean: - rm -f *.o main libwhisper.a + rm -f *.o main stream libwhisper.a # # Examples diff --git a/README.md b/README.md index 6991278..10c0e3c 100644 --- a/README.md +++ b/README.md @@ -289,7 +289,7 @@ You can download the converted models using the [download-ggml-model.sh](downloa https://ggml.ggerganov.com -For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py) or the README in [models](models). +For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README in [models](models). ## Bindings diff --git a/examples/whisper.wasm/README.md b/examples/whisper.wasm/README.md index a1178cd..645bab8 100644 --- a/examples/whisper.wasm/README.md +++ b/examples/whisper.wasm/README.md @@ -1,3 +1,27 @@ # whisper.wasm -Live demo: https://whisper.ggerganov.com +Inference of [OpenAI's Whisper ASR model](https://github.com/openai/whisper) inside the browser + +This example uses a WebAssembly (WASM) port of the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) +implementation of the transformer to run the inference inside a web page. The audio data does not leave your computer - +it is processed locally on your machine. The performance is not great but you should be able to achieve x2 or x3 +real-time for the `tiny` and `base` models on a modern CPU and browser (i.e. transcribe a 60 seconds audio in about +~20-30 seconds). + +This WASM port utilizes [WASM SIMD 128-bit intrinsics](https://emcc.zcopy.site/docs/porting/simd/) so you have to make +sure that [your browser supports them](https://webassembly.org/roadmap/). + +The example is capable of running all models up to size `small` inclusive. Beyond that, the memory requirements and +performance are unsatisfactory. The implementation currently support only the `Greedy` sampling strategy. Both +transcription and translation are supported. + +Since the model data is quite big (74MB for the `tiny` model) you need to manually load the model into the web-page. + +The example supports both loading audio from a file and recording audio from the microphone. The maximum length of the +audio is limited to 120 seconds. + +## Live demo + +Link: https://whisper.ggerganov.com + +![image](https://user-images.githubusercontent.com/1991296/197348344-1a7fead8-3dae-4922-8b06-df223a206603.png) diff --git a/examples/whisper.wasm/index-tmpl.html b/examples/whisper.wasm/index-tmpl.html index 1f340e5..0e0d0ff 100644 --- a/examples/whisper.wasm/index-tmpl.html +++ b/examples/whisper.wasm/index-tmpl.html @@ -162,7 +162,7 @@ -

+
@@ -254,6 +254,10 @@ return new type(buffer); } + // + // load model + // + function loadFile(event, fname) { var file = event.target.files[0] || null; if (file == null) { @@ -281,6 +285,10 @@ reader.readAsArrayBuffer(file); } + // + // audio file + // + function loadAudio(event) { if (!context) { context = new AudioContext({sampleRate: 16000}); @@ -327,7 +335,7 @@ } // - // Microphone + // microphone // var mediaRecorder = null; diff --git a/extra/convert-all.sh b/extra/convert-all.sh index 37a8721..20801af 100755 --- a/extra/convert-all.sh +++ b/extra/convert-all.sh @@ -3,6 +3,6 @@ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" ) for model in "${models[@]}"; do - python3 convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/ + python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/ mv -v models/ggml-model.bin models/ggml-$model.bin done diff --git a/convert-pt-to-ggml.py b/models/convert-pt-to-ggml.py similarity index 100% rename from convert-pt-to-ggml.py rename to models/convert-pt-to-ggml.py