Merge branch 'ggerganov:master' into master

2 years ago · 8dc946bd81
parent 32b72c3169 b2083c5d02
commit 8dc946bd81
19 changed files with 1008 additions and 708 deletions
--- a/.github/workflows/bindings.yml
+++ b/.github/workflows/bindings.yml
@ -3,11 +3,16 @@ on:
  push:
    paths:
      - bindings/go/**
+      - whisper.h
+  pull_request:
+    paths:
+      - bindings/go/**
+      - whisper.h

 jobs:
-    ubuntu-latest:
-      runs-on: ubuntu-latest
-      steps:
+  ubuntu-latest:
+    runs-on: ubuntu-latest
+    steps:
      - uses: actions/setup-go@v3
        with:
          go-version: '^1.19'
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -1,267 +1,267 @@
 name: CI
-on: [push]
+on: [push, pull_request]

 jobs:
-    ubuntu-latest:
-        runs-on: ubuntu-latest
-
-        steps:
-            - name: Clone
-              uses: actions/checkout@v1
-
-            - name: Dependencies
-              run: |
-                  sudo apt-get update
-                  sudo apt-get install build-essential
-                  sudo apt-get install libsdl2-dev
-
-            - name: Build
-              run: |
-                make
-                make stream
-
-    macOS-latest:
-        runs-on: macOS-latest
-
-        steps:
-            - name: Clone
-              uses: actions/checkout@v1
-
-            - name: Dependencies
-              run: |
-                  brew update
-                  brew install sdl2
-
-            - name: Build
-              run: |
-                make
-                make stream
-
-    ubuntu-latest-gcc:
-        runs-on: ubuntu-latest
-
-        strategy:
-            matrix:
-                build: [Debug, Release]
-
-        steps:
-            - name: Clone
-              uses: actions/checkout@v1
-
-            - name: Dependencies
-              run: |
-                  sudo apt-get update
-                  sudo apt-get install build-essential
-                  sudo apt-get install cmake
-                  sudo apt-get install libsdl2-dev
-
-            - name: Configure
-              run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-
-            - name: Build
-              run: |
-                make
-                ctest -L gh --output-on-failure
-
-    ubuntu-latest-clang:
-        runs-on: ubuntu-latest
-
-        strategy:
-            matrix:
-                build: [Debug, Release]
-
-        steps:
-            - name: Clone
-              uses: actions/checkout@v1
-
-            - name: Dependencies
-              run: |
-                  sudo apt-get update
-                  sudo apt-get install build-essential
-                  sudo apt-get install cmake
-                  sudo apt-get install libsdl2-dev
-
-            - name: Configure
-              run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
-
-            - name: Build
-              run: |
-                make
-                ctest -L gh --output-on-failure
-
-    ubuntu-latest-gcc-sanitized:
-        runs-on: ubuntu-latest
-
-        strategy:
-            matrix:
-                sanitizer: [ADDRESS, THREAD, UNDEFINED]
-
-        steps:
-            - name: Clone
-              uses: actions/checkout@v1
-
-            - name: Dependencies
-              run: |
-                  sudo apt-get update
-                  sudo apt-get install build-essential
-                  sudo apt-get install cmake
-
-            - name: Configure
-              run: cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
-
-            - name: Build
-              run: |
-                make
-                ctest -L gh --output-on-failure
-
-    windows:
-        runs-on: windows-latest
-
-        strategy:
-            matrix:
-                build: [Release]
-                arch: [Win32, x64]
-                sdl2: [ON]
-                include:
-                  - arch: Win32
-                    s2arc: x86
-                  - arch: x64
-                    s2arc: x64
-                  - sdl2: ON
-                    s2ver: 2.26.0
-
-        steps:
-            - name: Clone
-              uses: actions/checkout@v1
-
-            - name: Add msbuild to PATH
-              uses: microsoft/setup-msbuild@v1
-
-            - name: Fetch SDL2 and set SDL2_DIR
-              if: matrix.sdl2 == 'ON'
-              run: |
-                C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
-                7z x sdl2.zip
-                echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
-
-            - name: Configure
-              run: >
-                cmake -S . -B ./build -A ${{ matrix.arch }}
-                -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-                -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
-
-            - name: Build
-              run: |
-                cd ./build
-                msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
-
-            - name: Copy SDL2.dll
-              if: matrix.sdl2 == 'ON'
-              run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
-
-            - name: Upload binaries
-              if: matrix.sdl2 == 'ON'
-              uses: actions/upload-artifact@v1
-              with:
-                name: whisper-bin-${{ matrix.arch }}
-                path: build/bin/${{ matrix.build }}
-
-    windows-blas:
-        runs-on: windows-latest
-
-        strategy:
-            matrix:
-                build: [Release]
-                arch: [Win32, x64]
-                blas: [ON]
-                sdl2: [ON]
-                include:
-                  - arch: Win32
-                    obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
-                    s2arc: x86
-                  - arch: x64
-                    obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
-                    s2arc: x64
-                  - sdl2: ON
-                    s2ver: 2.26.0
-
-        steps:
-            - name: Clone
-              uses: actions/checkout@v1
-
-            - name: Add msbuild to PATH
-              uses: microsoft/setup-msbuild@v1
-
-            - name: Fetch OpenBLAS
-              if: matrix.blas == 'ON'
-              run: |
-                C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
-                7z x blas.zip -oblas -y
-                copy blas/include/cblas.h .
-                copy blas/include/openblas_config.h .
-                echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
-
-            - name: Fetch SDL2 and set SDL2_DIR
-              if: matrix.sdl2 == 'ON'
-              run: |
-                C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
-                7z x sdl2.zip
-                echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
-
-            - name: Configure
-              run: >
-                cmake -S . -B ./build -A ${{ matrix.arch }}
-                -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-                -DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
-                -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
-                -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
-
-            - name: Build
-              run: |
-                cd ./build
-                msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
-
-            - name: Copy libopenblas.dll
-              if: matrix.blas == 'ON'
-              run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
-
-            - name: Copy SDL2.dll
-              if: matrix.sdl2 == 'ON'
-              run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
-
-            - name: Upload binaries
-              if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
-              uses: actions/upload-artifact@v1
-              with:
-                name: whisper-blas-bin-${{ matrix.arch }}
-                path: build/bin/${{ matrix.build }}
-
-    emscripten:
-        runs-on: ubuntu-latest
-
-        strategy:
-            matrix:
-                build: [Release]
-
-        steps:
-            - name: Clone
-              uses: actions/checkout@v1
-
-            - name: Dependencies
-              run: |
-                wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
-                tar -xvf master.tar.gz
-                emsdk-master/emsdk update
-                emsdk-master/emsdk install latest
-                emsdk-master/emsdk activate latest
-
-            - name: Configure
-              run: echo "tmp"
-
-            - name: Build
-              run: |
-                pushd emsdk-master
-                source ./emsdk_env.sh
-                popd
-                emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-                make
+  ubuntu-latest:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+          sudo apt-get install libsdl2-dev
+
+      - name: Build
+        run: |
+          make
+          make stream
+
+  macOS-latest:
+    runs-on: macOS-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        run: |
+          brew update
+          brew install sdl2
+
+      - name: Build
+        run: |
+          make
+          make stream
+
+  ubuntu-latest-gcc:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        build: [Debug, Release]
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+          sudo apt-get install cmake
+          sudo apt-get install libsdl2-dev
+
+      - name: Configure
+        run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+
+      - name: Build
+        run: |
+          make
+          ctest -L gh --output-on-failure
+
+  ubuntu-latest-clang:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        build: [Debug, Release]
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+          sudo apt-get install cmake
+          sudo apt-get install libsdl2-dev
+
+      - name: Configure
+        run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
+
+      - name: Build
+        run: |
+          make
+          ctest -L gh --output-on-failure
+
+  ubuntu-latest-gcc-sanitized:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        sanitizer: [ADDRESS, THREAD, UNDEFINED]
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+          sudo apt-get install cmake
+
+      - name: Configure
+        run: cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
+
+      - name: Build
+        run: |
+          make
+          ctest -L gh --output-on-failure
+
+  windows:
+    runs-on: windows-latest
+
+    strategy:
+      matrix:
+        build: [Release]
+        arch: [Win32, x64]
+        sdl2: [ON]
+        include:
+          - arch: Win32
+            s2arc: x86
+          - arch: x64
+            s2arc: x64
+          - sdl2: ON
+            s2ver: 2.26.0
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+
+      - name: Add msbuild to PATH
+        uses: microsoft/setup-msbuild@v1
+
+      - name: Fetch SDL2 and set SDL2_DIR
+        if: matrix.sdl2 == 'ON'
+        run: |
+          C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
+          7z x sdl2.zip
+          echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
+
+      - name: Configure
+        run: >
+          cmake -S . -B ./build -A ${{ matrix.arch }}
+          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+          -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
+
+      - name: Build
+        run: |
+          cd ./build
+          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
+
+      - name: Copy SDL2.dll
+        if: matrix.sdl2 == 'ON'
+        run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
+
+      - name: Upload binaries
+        if: matrix.sdl2 == 'ON'
+        uses: actions/upload-artifact@v1
+        with:
+          name: whisper-bin-${{ matrix.arch }}
+          path: build/bin/${{ matrix.build }}
+
+  windows-blas:
+    runs-on: windows-latest
+
+    strategy:
+      matrix:
+        build: [Release]
+        arch: [Win32, x64]
+        blas: [ON]
+        sdl2: [ON]
+        include:
+          - arch: Win32
+            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
+            s2arc: x86
+          - arch: x64
+            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
+            s2arc: x64
+          - sdl2: ON
+            s2ver: 2.26.0
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+
+      - name: Add msbuild to PATH
+        uses: microsoft/setup-msbuild@v1
+
+      - name: Fetch OpenBLAS
+        if: matrix.blas == 'ON'
+        run: |
+          C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
+          7z x blas.zip -oblas -y
+          copy blas/include/cblas.h .
+          copy blas/include/openblas_config.h .
+          echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
+
+      - name: Fetch SDL2 and set SDL2_DIR
+        if: matrix.sdl2 == 'ON'
+        run: |
+          C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
+          7z x sdl2.zip
+          echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
+
+      - name: Configure
+        run: >
+          cmake -S . -B ./build -A ${{ matrix.arch }}
+          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+          -DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
+          -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
+          -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
+
+      - name: Build
+        run: |
+          cd ./build
+          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
+
+      - name: Copy libopenblas.dll
+        if: matrix.blas == 'ON'
+        run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
+
+      - name: Copy SDL2.dll
+        if: matrix.sdl2 == 'ON'
+        run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
+
+      - name: Upload binaries
+        if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
+        uses: actions/upload-artifact@v1
+        with:
+          name: whisper-blas-bin-${{ matrix.arch }}
+          path: build/bin/${{ matrix.build }}
+
+  emscripten:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        build: [Release]
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        run: |
+          wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
+          tar -xvf master.tar.gz
+          emsdk-master/emsdk update
+          emsdk-master/emsdk install latest
+          emsdk-master/emsdk activate latest
+
+      - name: Configure
+        run: echo "tmp"
+
+      - name: Build
+        run: |
+          pushd emsdk-master
+          source ./emsdk_env.sh
+          popd
+          emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+          make
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 *.o
+*.a
 .cache/
 .vs/
 .vscode/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.0)

-project(whisper.cpp VERSION 1.1.1)
+project(whisper.cpp VERSION 1.2.0)

 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)

-Stable: [v1.1.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.1.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Stable: [v1.2.0](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)

 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

@ -13,7 +13,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
 - AVX intrinsics support for x86 architectures
 - VSX intrinsics support for POWER architectures
 - Mixed F16 / F32 precision
- Low memory usage (Flash Attention + Flash Forward)
+- Low memory usage (Flash Attention)
 - Zero memory allocations at runtime
 - Runs on the CPU
 - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
@ -89,35 +89,37 @@ c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o gg
 usage: ./main [options] file0.wav file1.wav ...

 options:
-  -h,       --help            [default] show this help message and exit
-  -t N,     --threads N       [4      ] number of threads to use during computation
-  -p N,     --processors N    [1      ] number of processors to use during computation
-  -ot N,    --offset-t N      [0      ] time offset in milliseconds
-  -on N,    --offset-n N      [0      ] segment index offset
-  -d  N,    --duration N      [0      ] duration of audio to process in milliseconds
-  -mc N,    --max-context N   [-1     ] maximum number of text context tokens to store
-  -ml N,    --max-len N       [0      ] maximum segment length in characters
-  -bo N,    --best-of N       [5      ] number of best candidates to keep
-  -bs N,    --beam-size N     [-1     ] beam size for beam search
-  -wt N,    --word-thold N    [0.01   ] word timestamp probability threshold
-  -et N,    --entropy-thold N [2.40   ] entropy threshold for decoder fail
-  -lpt N,   --logprob-thold N [-1.00  ] log probability threshold for decoder fail
-  -su,      --speed-up        [false  ] speed up audio by x2 (reduced accuracy)
-  -tr,      --translate       [false  ] translate from source language to english
-  -di,      --diarize         [false  ] stereo audio diarization
-  -otxt,    --output-txt      [false  ] output result in a text file
-  -ovtt,    --output-vtt      [false  ] output result in a vtt file
-  -osrt,    --output-srt      [false  ] output result in a srt file
-  -owts,    --output-words    [false  ] output script for generating karaoke video
-  -ocsv,    --output-csv      [false  ] output result in a CSV file
-  -ps,      --print-special   [false  ] print special tokens
-  -pc,      --print-colors    [false  ] print colors
-  -pp,      --print-progress  [false  ] print progress
-  -nt,      --no-timestamps   [true   ] do not print timestamps
-  -l LANG,  --language LANG   [en     ] spoken language ('auto' for auto-detect)
-            --prompt PROMPT   [       ] initial prompt
-  -m FNAME, --model FNAME     [models/ggml-base.en.bin] model path
-  -f FNAME, --file FNAME      [       ] input WAV file path
+  -h,        --help              [default] show this help message and exit
+  -t N,      --threads N         [4      ] number of threads to use during computation
+  -p N,      --processors N      [1      ] number of processors to use during computation
+  -ot N,     --offset-t N        [0      ] time offset in milliseconds
+  -on N,     --offset-n N        [0      ] segment index offset
+  -d  N,     --duration N        [0      ] duration of audio to process in milliseconds
+  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
+  -ml N,     --max-len N         [0      ] maximum segment length in characters
+  -bo N,     --best-of N         [5      ] number of best candidates to keep
+  -bs N,     --beam-size N       [-1     ] beam size for beam search
+  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
+  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
+  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
+  -su,       --speed-up          [false  ] speed up audio by x2 (reduced accuracy)
+  -tr,       --translate         [false  ] translate from source language to english
+  -di,       --diarize           [false  ] stereo audio diarization
+  -nf,       --no-fallback       [false  ] do not use temperature fallback while decoding
+  -otxt,     --output-txt        [false  ] output result in a text file
+  -ovtt,     --output-vtt        [false  ] output result in a vtt file
+  -osrt,     --output-srt        [false  ] output result in a srt file
+  -owts,     --output-words      [false  ] output script for generating karaoke video
+  -ocsv,     --output-csv        [false  ] output result in a CSV file
+  -of FNAME, --output-file FNAME [       ] output file path (without file extension)
+  -ps,       --print-special     [false  ] print special tokens
+  -pc,       --print-colors      [false  ] print colors
+  -pp,       --print-progress    [false  ] print progress
+  -nt,       --no-timestamps     [true   ] do not print timestamps
+  -l LANG,   --language LANG     [en     ] spoken language ('auto' for auto-detect)
+             --prompt PROMPT     [       ] initial prompt
+  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
+  -f FNAME,  --file FNAME        [       ] input WAV file path


 bash ./models/download-ggml-model.sh base.en
@ -137,7 +139,8 @@ Running base.en on all samples in ./samples ...
 [+] Running base.en on samples/jfk.wav ... (run 'ffplay samples/jfk.wav' to listen)
 ----------------------------------------------

-whisper_model_load: loading model from 'models/ggml-base.en.bin'
+whisper_init_from_file: loading model from 'models/ggml-base.en.bin'
+whisper_model_load: loading model
 whisper_model_load: n_vocab       = 51864
 whisper_model_load: n_audio_ctx   = 1500
 whisper_model_load: n_audio_state = 512
@ -150,13 +153,14 @@ whisper_model_load: n_text_layer  = 6
 whisper_model_load: n_mels        = 80
 whisper_model_load: f16           = 1
 whisper_model_load: type          = 2
+whisper_model_load: mem required  =  215.00 MB (+    6.00 MB per decoder)
+whisper_model_load: kv self size  =    5.25 MB
+whisper_model_load: kv cross size =   17.58 MB
 whisper_model_load: adding 1607 extra tokens
-whisper_model_load: mem_required  =  506.00 MB
-whisper_model_load: ggml ctx size =  140.60 MB
-whisper_model_load: memory size   =   22.83 MB
+whisper_model_load: model ctx     =  140.60 MB
 whisper_model_load: model size    =  140.54 MB

-system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
+system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |

 main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...

@ -164,12 +168,13 @@ main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 proc
 [00:00:00.000 --> 00:00:11.000]   And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.


-whisper_print_timings:     load time =   105.91 ms
-whisper_print_timings:      mel time =    24.62 ms
-whisper_print_timings:   sample time =     3.63 ms
-whisper_print_timings:   encode time =   324.71 ms / 54.12 ms per layer
-whisper_print_timings:   decode time =    83.58 ms / 13.93 ms per layer
-whisper_print_timings:    total time =   542.81 ms
+whisper_print_timings:     fallbacks =   0 p /   0 h
+whisper_print_timings:     load time =   113.81 ms
+whisper_print_timings:      mel time =    15.40 ms
+whisper_print_timings:   sample time =    11.58 ms /    27 runs (    0.43 ms per run)
+whisper_print_timings:   encode time =   266.60 ms /     1 runs (  266.60 ms per run)
+whisper_print_timings:   decode time =    66.11 ms /    27 runs (    2.45 ms per run)
+whisper_print_timings:    total time =   476.31 ms
 ```

 The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
@ -212,11 +217,11 @@ make large

 | Model  | Disk   | Mem     | SHA                                        |
 | ---    | ---    | ---     | ---                                        |
-| tiny   |  75 MB | ~390 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
-| base   | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
-| small  | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
-| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
-| large  | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
+| tiny   |  75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
+| base   | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
+| small  | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
+| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
+| large  | 2.9 GB | ~3.3 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |

 ## Limitations

@ -234,7 +239,8 @@ in about half a minute on a MacBook M1 Pro, using `medium.en` model:
 ```java
 $ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8

-whisper_model_load: loading model from 'models/ggml-medium.en.bin'
+whisper_init_from_file: loading model from 'models/ggml-medium.en.bin'
+whisper_model_load: loading model
 whisper_model_load: n_vocab       = 51864
 whisper_model_load: n_audio_ctx   = 1500
 whisper_model_load: n_audio_state = 1024
@ -247,55 +253,60 @@ whisper_model_load: n_text_layer  = 24
 whisper_model_load: n_mels        = 80
 whisper_model_load: f16           = 1
 whisper_model_load: type          = 4
-whisper_model_load: mem_required  = 2610.00 MB
+whisper_model_load: mem required  = 1720.00 MB (+   43.00 MB per decoder)
+whisper_model_load: kv self size  =   42.00 MB
+whisper_model_load: kv cross size =  140.62 MB
 whisper_model_load: adding 1607 extra tokens
-whisper_model_load: ggml ctx size = 1644.97 MB
-whisper_model_load: memory size =   182.62 MB
-whisper_model_load: model size  =  1462.12 MB
-
-main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, lang = en, task = transcribe, timestamps = 1 ...
-
-[00:00.000 --> 00:08.000]   My fellow Americans, this day has brought terrible news and great sadness to our country.
-[00:08.000 --> 00:17.000]   At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
-[00:17.000 --> 00:23.000]   A short time later, debris was seen falling from the skies above Texas.
-[00:23.000 --> 00:29.000]   The Columbia's lost. There are no survivors.
-[00:29.000 --> 00:32.000]   On board was a crew of seven.
-[00:32.000 --> 00:39.000]   Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
-[00:39.000 --> 00:48.000]   Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
-[00:48.000 --> 00:52.000]   a colonel in the Israeli Air Force.
-[00:52.000 --> 00:58.000]   These men and women assumed great risk in the service to all humanity.
-[00:58.000 --> 01:03.000]   In an age when space flight has come to seem almost routine,
-[01:03.000 --> 01:07.000]   it is easy to overlook the dangers of travel by rocket
-[01:07.000 --> 01:12.000]   and the difficulties of navigating the fierce outer atmosphere of the Earth.
-[01:12.000 --> 01:18.000]   These astronauts knew the dangers, and they faced them willingly,
-[01:18.000 --> 01:23.000]   knowing they had a high and noble purpose in life.
-[01:23.000 --> 01:31.000]   Because of their courage and daring and idealism, we will miss them all the more.
-[01:31.000 --> 01:36.000]   All Americans today are thinking as well of the families of these men and women
-[01:36.000 --> 01:40.000]   who have been given this sudden shock and grief.
-[01:40.000 --> 01:45.000]   You're not alone. Our entire nation grieves with you,
-[01:45.000 --> 01:52.000]   and those you love will always have the respect and gratitude of this country.
-[01:52.000 --> 01:56.000]   The cause in which they died will continue.
-[01:56.000 --> 02:04.000]   Mankind is led into the darkness beyond our world by the inspiration of discovery
-[02:04.000 --> 02:11.000]   and the longing to understand. Our journey into space will go on.
-[02:11.000 --> 02:16.000]   In the skies today, we saw destruction and tragedy.
-[02:16.000 --> 02:22.000]   Yet farther than we can see, there is comfort and hope.
-[02:22.000 --> 02:29.000]   In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
-[02:29.000 --> 02:35.000]   who created all these. He who brings out the starry hosts one by one
-[02:35.000 --> 02:39.000]   and calls them each by name."
-[02:39.000 --> 02:46.000]   Because of His great power and mighty strength, not one of them is missing.
-[02:46.000 --> 02:55.000]   The same Creator who names the stars also knows the names of the seven souls we mourn today.
-[02:55.000 --> 03:01.000]   The crew of the shuttle Columbia did not return safely to earth,
-[03:01.000 --> 03:05.000]   yet we can pray that all are safely home.
-[03:05.000 --> 03:13.000]   May God bless the grieving families, and may God continue to bless America.
-[03:13.000 --> 03:41.000]   Audio
-
-
-whisper_print_timings:     load time =   575.92 ms
-whisper_print_timings:      mel time =   230.60 ms
-whisper_print_timings:   sample time =    73.19 ms
-whisper_print_timings:   encode time = 19552.61 ms / 814.69 ms per layer
-whisper_print_timings:   decode time = 13249.96 ms / 552.08 ms per layer
-whisper_print_timings:    total time = 33686.27 ms
+whisper_model_load: model ctx     = 1462.35 MB
+whisper_model_load: model size    = 1462.12 MB
+
+system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
+
+main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
+
+
+[00:00:00.000 --> 00:00:08.000]   My fellow Americans, this day has brought terrible news and great sadness to our country.
+[00:00:08.000 --> 00:00:17.000]   At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
+[00:00:17.000 --> 00:00:23.000]   A short time later, debris was seen falling from the skies above Texas.
+[00:00:23.000 --> 00:00:29.000]   The Columbia's lost. There are no survivors.
+[00:00:29.000 --> 00:00:32.000]   On board was a crew of seven.
+[00:00:32.000 --> 00:00:39.000]   Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
+[00:00:39.000 --> 00:00:48.000]   Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
+[00:00:48.000 --> 00:00:52.000]   a colonel in the Israeli Air Force.
+[00:00:52.000 --> 00:00:58.000]   These men and women assumed great risk in the service to all humanity.
+[00:00:58.000 --> 00:01:03.000]   In an age when space flight has come to seem almost routine,
+[00:01:03.000 --> 00:01:07.000]   it is easy to overlook the dangers of travel by rocket
+[00:01:07.000 --> 00:01:12.000]   and the difficulties of navigating the fierce outer atmosphere of the Earth.
+[00:01:12.000 --> 00:01:18.000]   These astronauts knew the dangers, and they faced them willingly,
+[00:01:18.000 --> 00:01:23.000]   knowing they had a high and noble purpose in life.
+[00:01:23.000 --> 00:01:31.000]   Because of their courage and daring and idealism, we will miss them all the more.
+[00:01:31.000 --> 00:01:36.000]   All Americans today are thinking as well of the families of these men and women
+[00:01:36.000 --> 00:01:40.000]   who have been given this sudden shock and grief.
+[00:01:40.000 --> 00:01:45.000]   You're not alone. Our entire nation grieves with you,
+[00:01:45.000 --> 00:01:52.000]   and those you love will always have the respect and gratitude of this country.
+[00:01:52.000 --> 00:01:56.000]   The cause in which they died will continue.
+[00:01:56.000 --> 00:02:04.000]   Mankind is led into the darkness beyond our world by the inspiration of discovery
+[00:02:04.000 --> 00:02:11.000]   and the longing to understand. Our journey into space will go on.
+[00:02:11.000 --> 00:02:16.000]   In the skies today, we saw destruction and tragedy.
+[00:02:16.000 --> 00:02:22.000]   Yet farther than we can see, there is comfort and hope.
+[00:02:22.000 --> 00:02:29.000]   In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
+[00:02:29.000 --> 00:02:35.000]   who created all these. He who brings out the starry hosts one by one
+[00:02:35.000 --> 00:02:39.000]   and calls them each by name."
+[00:02:39.000 --> 00:02:46.000]   Because of His great power and mighty strength, not one of them is missing.
+[00:02:46.000 --> 00:02:55.000]   The same Creator who names the stars also knows the names of the seven souls we mourn today.
+[00:02:55.000 --> 00:03:01.000]   The crew of the shuttle Columbia did not return safely to earth,
+[00:03:01.000 --> 00:03:05.000]   yet we can pray that all are safely home.
+[00:03:05.000 --> 00:03:13.000]   May God bless the grieving families, and may God continue to bless America.
+[00:03:13.000 --> 00:03:19.000]   [Silence]
+
+
+whisper_print_timings:     fallbacks =   1 p /   0 h
+whisper_print_timings:     load time =   569.03 ms
+whisper_print_timings:      mel time =   146.85 ms
+whisper_print_timings:   sample time =   238.66 ms /   553 runs (    0.43 ms per run)
+whisper_print_timings:   encode time = 18665.10 ms /     9 runs ( 2073.90 ms per run)
+whisper_print_timings:   decode time = 13090.93 ms /   549 runs (   23.85 ms per run)
+whisper_print_timings:    total time = 32733.52 ms
 ```
 </details>

@ -321,14 +332,14 @@ to highlight words with high or low confidence:

 ## Controlling the length of the generated text segments (experimental)

-For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`: 
+For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:

 ```java
 ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16

 whisper_model_load: loading model from './models/ggml-base.en.bin'
 ...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | 
+system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |

 main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...

@ -352,7 +363,7 @@ The `--max-len` argument can be used to obtain word-level timestamps. Simply use

 whisper_model_load: loading model from './models/ggml-base.en.bin'
 ...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | 
+system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |

 main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...

--- a/bindings/go/examples/go-whisper/process.go
+++ b/bindings/go/examples/go-whisper/process.go
@ -25,6 +25,8 @@ func Process(model whisper.Model, path string, flags *Flags) error {
 		return err
 	}

+	fmt.Printf("\n%s\n", context.SystemInfo())
+
 	// Open the file
 	fmt.Fprintf(flags.Output(), "Loading %q\n", path)
 	fh, err := os.Open(path)
@ -64,10 +66,13 @@ func Process(model whisper.Model, path string, flags *Flags) error {

 	// Process the data
 	fmt.Fprintf(flags.Output(), "  ...processing %q\n", path)
+	context.ResetTimings()
 	if err := context.Process(data, cb); err != nil {
 		return err
 	}

+	context.PrintTimings()
+
 	// Print out the results
 	switch {
 	case flags.GetOut() == "srt":
--- a/bindings/go/params.go
+++ b/bindings/go/params.go
@ -49,6 +49,10 @@ func (p *Params) SetSpeedup(v bool) {

 // Set language id
 func (p *Params) SetLanguage(lang int) error {
+	if lang == -1 {
+		p.language = nil
+		return nil
+	}
 	str := C.whisper_lang_str(C.int(lang))
 	if str == nil {
 		return ErrInvalidLanguage
@ -66,6 +70,11 @@ func (p *Params) Language() int {
 	return int(C.whisper_lang_id(p.language))
 }

+// Threads available
+func (p *Params) Threads() int {
+	return int(p.n_threads)
+}
+
 // Set number of threads to use
 func (p *Params) SetThreads(threads int) {
 	p.n_threads = C.int(threads)
--- a/bindings/go/pkg/whisper/context.go
+++ b/bindings/go/pkg/whisper/context.go
@ -1,7 +1,9 @@
 package whisper

 import (
+	"fmt"
 	"io"
+	"runtime"
 	"strings"
 	"time"

@ -44,7 +46,10 @@ func (context *context) SetLanguage(lang string) error {
 	if !context.model.IsMultilingual() {
 		return ErrModelNotMultilingual
 	}
-	if id := context.model.ctx.Whisper_lang_id(lang); id < 0 {
+
+	if lang == "auto" {
+		context.params.SetLanguage(-1)
+	} else if id := context.model.ctx.Whisper_lang_id(lang); id < 0 {
 		return ErrUnsupportedLanguage
 	} else if err := context.params.SetLanguage(id); err != nil {
 		return err
@ -59,6 +64,10 @@ func (context *context) IsMultilingual() bool {

 // Get language
 func (context *context) Language() string {
+	id := context.params.Language()
+	if id == -1 {
+		return "auto"
+	}
 	return whisper.Whisper_lang_str(context.params.Language())
 }

@ -107,6 +116,36 @@ func (context *context) SetMaxTokensPerSegment(n uint) {
 	context.params.SetMaxTokensPerSegment(int(n))
 }

+// ResetTimings resets the mode timings. Should be called before processing
+func (context *context) ResetTimings() {
+	context.model.ctx.Whisper_reset_timings()
+}
+
+// PrintTimings prints the model timings to stdout.
+func (context *context) PrintTimings() {
+	context.model.ctx.Whisper_print_timings()
+}
+
+// SystemInfo returns the system information
+func (context *context) SystemInfo() string {
+	return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n",
+		context.params.Threads(),
+		runtime.NumCPU(),
+		whisper.Whisper_print_system_info(),
+	)
+}
+
+// Use mel data at offset_ms to try and auto-detect the spoken language
+// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
+// Returns the probabilities of all languages.
+func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) {
+	langProbs, err := context.model.ctx.Whisper_lang_auto_detect(offset_ms, n_threads)
+	if err != nil {
+		return nil, err
+	}
+	return langProbs, nil
+}
+
 // Process new sample data and return any errors
 func (context *context) Process(data []float32, cb SegmentCallback) error {
 	if context.model.ctx == nil {
--- a/bindings/go/pkg/whisper/interface.go
+++ b/bindings/go/pkg/whisper/interface.go
@ -29,7 +29,7 @@ type Model interface {

 // Context is the speach recognition context.
 type Context interface {
-	SetLanguage(string) error // Set the language to use for speech recognition.
+	SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
 	SetTranslate(bool)        // Set translate flag
 	IsMultilingual() bool     // Return true if the model is multilingual.
 	Language() string         // Get language
@ -60,6 +60,12 @@ type Context interface {
 	IsNOT(Token) bool          // Test for "No timestamps" token
 	IsLANG(Token, string) bool // Test for token associated with a specific language
 	IsText(Token) bool         // Test for text token
+
+	// Timings
+	PrintTimings()
+	ResetTimings()
+
+	SystemInfo() string
 }

 // Segment is the text result of a speech recognition.
--- a/bindings/ios
+++ b/bindings/ios
@ -1 +1 @@
-Subproject commit 9653b42eb4d6d7ef08f736e20f05c4d24492407b
+Subproject commit d5c6d5c8a39703153472055c13902defc7177d22
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,6 +1,6 @@
 {
  "name": "whisper.cpp",
-  "version": "1.1.1",
+  "version": "1.2.0",
  "description": "Whisper speech recognition",
  "main": "whisper.js",
  "scripts": {
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/examples/helpers.js
+++ b/examples/helpers.js
@ -8,7 +8,7 @@ function convertTypedArray(src, type) {

 var printTextarea = (function() {
    var element = document.getElementById('output');
-    if (element) element.alue = ''; // clear browser cache
+    if (element) element.value = ''; // clear browser cache
    return function(text) {
        if (arguments.length > 1) text = Array.prototype.slice.call(arguments).join(' ');
        console.log(text);
@ -88,11 +88,15 @@ async function fetchRemote(url, cbProgress, cbPrint) {
 // - check if the data is already in the IndexedDB
 // - if not, fetch it from the remote URL and store it in the IndexedDB
 function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {
-    // query the storage quota and print it
-    navigator.storage.estimate().then(function (estimate) {
-        cbPrint('loadRemote: storage quota: ' + estimate.quota + ' bytes');
-        cbPrint('loadRemote: storage usage: ' + estimate.usage + ' bytes');
-    });
+    if (!navigator.storage || !navigator.storage.estimate) {
+        cbPrint('loadRemote: navigator.storage.estimate() is not supported');
+    } else {
+        // query the storage quota and print it
+        navigator.storage.estimate().then(function (estimate) {
+            cbPrint('loadRemote: storage quota: ' + estimate.quota + ' bytes');
+            cbPrint('loadRemote: storage usage: ' + estimate.usage + ' bytes');
+        });
+    }

    // check if the data is already in the IndexedDB
    var rq = indexedDB.open(dbName, dbVersion);
--- a/examples/livestream.sh
+++ b/examples/livestream.sh
@ -100,7 +100,7 @@ while [ $running -eq 1 ]; do
        err=$(cat /tmp/whisper-live.err | wc -l)
    done

-    ./main -t 8 -m ./models/ggml-base.en.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1
+    ./main -t 8 -m ./models/ggml-${model}.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1

    while [ $SECONDS -lt $((($i+1)*$step_s)) ]; do
        sleep 1
--- a/examples/main/README.md
+++ b/examples/main/README.md
@ -9,25 +9,35 @@ It can be used as a reference for using the `whisper.cpp` library in other proje
 usage: ./main [options] file0.wav file1.wav ...

 options:
-  -h,       --help          [default] show this help message and exit
-  -t N,     --threads N     [4      ] number of threads to use during computation
-  -p N,     --processors N  [1      ] number of processors to use during computation
-  -ot N,    --offset-t N    [0      ] time offset in milliseconds
-  -on N,    --offset-n N    [0      ] segment index offset
-  -d  N,    --duration N    [0      ] duration of audio to process in milliseconds
-  -mc N,    --max-context N [-1     ] maximum number of text context tokens to store
-  -ml N,    --max-len N     [0      ] maximum segment length in characters
-  -wt N,    --word-thold N  [0.01   ] word timestamp probability threshold
-  -su,      --speed-up      [false  ] speed up audio by x2 (reduced accuracy)
-  -tr,      --translate     [false  ] translate from source language to english
-  -otxt,    --output-txt    [false  ] output result in a text file
-  -ovtt,    --output-vtt    [false  ] output result in a vtt file
-  -osrt,    --output-srt    [false  ] output result in a srt file
-  -owts,    --output-words  [false  ] output script for generating karaoke video
-  -ps,      --print-special [false  ] print special tokens
-  -pc,      --print-colors  [false  ] print colors
-  -nt,      --no-timestamps [true   ] do not print timestamps
-  -l LANG,  --language LANG [en     ] spoken language
-  -m FNAME, --model FNAME   [models/ggml-base.en.bin] model path
-  -f FNAME, --file FNAME    [       ] input WAV file path
+  -h,        --help              [default] show this help message and exit
+  -t N,      --threads N         [4      ] number of threads to use during computation
+  -p N,      --processors N      [1      ] number of processors to use during computation
+  -ot N,     --offset-t N        [0      ] time offset in milliseconds
+  -on N,     --offset-n N        [0      ] segment index offset
+  -d  N,     --duration N        [0      ] duration of audio to process in milliseconds
+  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
+  -ml N,     --max-len N         [0      ] maximum segment length in characters
+  -bo N,     --best-of N         [5      ] number of best candidates to keep
+  -bs N,     --beam-size N       [-1     ] beam size for beam search
+  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
+  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
+  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
+  -su,       --speed-up          [false  ] speed up audio by x2 (reduced accuracy)
+  -tr,       --translate         [false  ] translate from source language to english
+  -di,       --diarize           [false  ] stereo audio diarization
+  -nf,       --no-fallback       [false  ] do not use temperature fallback while decoding
+  -otxt,     --output-txt        [false  ] output result in a text file
+  -ovtt,     --output-vtt        [false  ] output result in a vtt file
+  -osrt,     --output-srt        [false  ] output result in a srt file
+  -owts,     --output-words      [false  ] output script for generating karaoke video
+  -ocsv,     --output-csv        [false  ] output result in a CSV file
+  -of FNAME, --output-file FNAME [       ] output file path (without file extension)
+  -ps,       --print-special     [false  ] print special tokens
+  -pc,       --print-colors      [false  ] print colors
+  -pp,       --print-progress    [false  ] print progress
+  -nt,       --no-timestamps     [true   ] do not print timestamps
+  -l LANG,   --language LANG     [en     ] spoken language ('auto' for auto-detect)
+             --prompt PROMPT     [       ] initial prompt
+  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
+  -f FNAME,  --file FNAME        [       ] input WAV file path
 ```
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -53,22 +53,23 @@ void replace_all(std::string & s, const std::string & search, const std::string
 // command-line parameters
 struct whisper_params {
    int32_t n_threads    = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t n_processors = 1;
-    int32_t offset_t_ms  = 0;
-    int32_t offset_n     = 0;
-    int32_t duration_ms  = 0;
+    int32_t n_processors =  1;
+    int32_t offset_t_ms  =  0;
+    int32_t offset_n     =  0;
+    int32_t duration_ms  =  0;
    int32_t max_context  = -1;
-    int32_t max_len      = 0;
-    int32_t best_of      = 5;
+    int32_t max_len      =  0;
+    int32_t best_of      =  5;
    int32_t beam_size    = -1;

-    float word_thold    = 0.01f;
-    float entropy_thold = 2.4f;
-    float logprob_thold = -1.0f;
+    float word_thold    =  0.01f;
+    float entropy_thold =  2.40f;
+    float logprob_thold = -1.00f;

    bool speed_up       = false;
    bool translate      = false;
    bool diarize        = false;
+    bool no_fallback    = false;
    bool output_txt     = false;
    bool output_vtt     = false;
    bool output_srt     = false;
@ -117,6 +118,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-su"   || arg == "--speed-up")       { params.speed_up       = true; }
        else if (arg == "-tr"   || arg == "--translate")      { params.translate      = true; }
        else if (arg == "-di"   || arg == "--diarize")        { params.diarize        = true; }
+        else if (arg == "-nf"   || arg == "--no-fallback")    { params.no_fallback    = true; }
        else if (arg == "-otxt" || arg == "--output-txt")     { params.output_txt     = true; }
        else if (arg == "-ovtt" || arg == "--output-vtt")     { params.output_vtt     = true; }
        else if (arg == "-osrt" || arg == "--output-srt")     { params.output_srt     = true; }
@ -162,6 +164,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -su,       --speed-up          [%-7s] speed up audio by x2 (reduced accuracy)\n",        params.speed_up ? "true" : "false");
    fprintf(stderr, "  -tr,       --translate         [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
    fprintf(stderr, "  -di,       --diarize           [%-7s] stereo audio diarization\n",                       params.diarize ? "true" : "false");
+    fprintf(stderr, "  -nf,       --no-fallback       [%-7s] do not use temperature fallback while decoding\n", params.no_fallback ? "true" : "false");
    fprintf(stderr, "  -otxt,     --output-txt        [%-7s] output result in a text file\n",                   params.output_txt ? "true" : "false");
    fprintf(stderr, "  -ovtt,     --output-vtt        [%-7s] output result in a vtt file\n",                    params.output_vtt ? "true" : "false");
    fprintf(stderr, "  -osrt,     --output-srt        [%-7s] output result in a srt file\n",                    params.output_srt ? "true" : "false");
@ -346,9 +349,6 @@ bool output_csv(struct whisper_context * ctx, const char * fname) {
    const int n_segments = whisper_full_n_segments(ctx);
    for (int i = 0; i < n_segments; ++i) {
        const char * text = whisper_full_get_segment_text(ctx, i);
-        if (text[0] == ' ') {
-            text = text + sizeof(char); //whisper_full_get_segment_text() returns a string with leading space, point to the next character.
-        }
        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);

@ -517,7 +517,7 @@ int main(int argc, char ** argv) {

    for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
        const auto fname_inp = params.fname_inp[f];
-		const auto fname_outp = f < params.fname_outp.size() && !params.fname_outp[f].empty() ? params.fname_outp[f] : params.fname_inp[f];
+		const auto fname_outp = f < (int) params.fname_outp.size() && !params.fname_outp[f].empty() ? params.fname_outp[f] : params.fname_inp[f];

        std::vector<float> pcmf32; // mono-channel F32 PCM
        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
@ -650,17 +650,19 @@ int main(int argc, char ** argv) {

            wparams.token_timestamps = params.output_wts || params.max_len > 0;
            wparams.thold_pt         = params.word_thold;
-            wparams.entropy_thold    = params.entropy_thold;
-            wparams.logprob_thold    = params.logprob_thold;
            wparams.max_len          = params.output_wts && params.max_len == 0 ? 60 : params.max_len;

            wparams.speed_up         = params.speed_up;

+            wparams.prompt_tokens     = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
+            wparams.prompt_n_tokens   = prompt_tokens.empty() ? 0       : prompt_tokens.size();
+
            wparams.greedy.best_of        = params.best_of;
            wparams.beam_search.beam_size = params.beam_size;

-            wparams.prompt_tokens     = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
-            wparams.prompt_n_tokens   = prompt_tokens.empty() ? 0       : prompt_tokens.size();
+            wparams.temperature_inc  = params.no_fallback ? 0.0f : wparams.temperature_inc;
+            wparams.entropy_thold    = params.entropy_thold;
+            wparams.logprob_thold    = params.logprob_thold;

            whisper_print_user_data user_data = { &params, &pcmf32s };

--- a/ggml.c
+++ b/ggml.c
@ -1258,7 +1258,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
 //

 struct ggml_object {
-    size_t offset;
+    size_t offs;
    size_t size;

    struct ggml_object * next;
@ -1284,6 +1284,9 @@ struct ggml_context {

    struct ggml_object * objects_begin;
    struct ggml_object * objects_end;
+
+    struct ggml_scratch scratch;
+    struct ggml_scratch scratch_save;
 };

 struct ggml_context_container {
@ -1346,7 +1349,7 @@ inline static void ggml_critical_section_end(void) {

 void ggml_print_object(const struct ggml_object * obj) {
    GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
-            obj->offset, obj->size, (const void *) obj->next);
+            obj->offs, obj->size, (const void *) obj->next);
 }

 void ggml_print_objects(const struct ggml_context * ctx) {
@ -1542,12 +1545,14 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
    }

    *ctx = (struct ggml_context) {
-        .mem_size         = params.mem_size,
-        .mem_buffer       = params.mem_buffer ? params.mem_buffer : malloc(params.mem_size),
-        .mem_buffer_owned = params.mem_buffer ? false : true,
-        .n_objects        = 0,
-        .objects_begin    = NULL,
-        .objects_end      = NULL,
+        /*.mem_size         =*/ params.mem_size,
+        /*.mem_buffer       =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size),
+        /*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
+        /*.n_objects        =*/ 0,
+        /*.objects_begin    =*/ NULL,
+        /*.objects_end      =*/ NULL,
+        /*.scratch          =*/ { 0, 0, NULL, },
+        /*.scratch_save     =*/ { 0, 0, NULL, },
    };

    ggml_assert_aligned(ctx->mem_buffer);
@ -1570,7 +1575,7 @@ void ggml_free(struct ggml_context * ctx) {
            g_state.contexts[i].used = false;

            GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n",
-                    __func__, i, ctx->n_objects, ctx->objects_end->offset + ctx->objects_end->size);
+                    __func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);

            if (ctx->mem_buffer_owned) {
                free(ctx->mem_buffer);
@ -1589,7 +1594,15 @@ void ggml_free(struct ggml_context * ctx) {
 }

 size_t ggml_used_mem(const struct ggml_context * ctx) {
-    return ctx->objects_end->offset + ctx->objects_end->size;
+    return ctx->objects_end->offs + ctx->objects_end->size;
+}
+
+size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) {
+    const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0;
+
+    ctx->scratch = scratch;
+
+    return result;
 }

 ////////////////////////////////////////////////////////////////////////////////
@ -1603,9 +1616,9 @@ struct ggml_tensor * ggml_new_tensor_impl(
    // always insert objects at the end of the context's memory pool
    struct ggml_object * obj_cur = ctx->objects_end;

-    const size_t cur_offset = obj_cur == NULL ? 0 : obj_cur->offset;
-    const size_t cur_size   = obj_cur == NULL ? 0 : obj_cur->size;
-    const size_t cur_end    = cur_offset + cur_size;
+    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
+    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
+    const size_t cur_end  = cur_offs + cur_size;

    size_t size_needed = 0;

@ -1616,25 +1629,52 @@ struct ggml_tensor * ggml_new_tensor_impl(
        }
        // align to GGML_MEM_ALIGN
        size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
-
-    }
-    size_needed += sizeof(struct ggml_tensor);
-
-    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
-        GGML_PRINT("%s: not enough space in the context's memory pool\n", __func__);
-        assert(false);
-        return NULL;
    }

    char * const mem_buffer = ctx->mem_buffer;
-
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);

-    *obj_new = (struct ggml_object) {
-        .offset = cur_end + GGML_OBJECT_SIZE,
-        .size   = size_needed,
-        .next   = NULL,
-    };
+    if (ctx->scratch.data == NULL || data != NULL) {
+        size_needed += sizeof(struct ggml_tensor);
+
+        if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
+            GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
+                    __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
+            assert(false);
+            return NULL;
+        }
+
+        *obj_new = (struct ggml_object) {
+            .offs = cur_end + GGML_OBJECT_SIZE,
+            .size = size_needed,
+            .next = NULL,
+        };
+    } else {
+        if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
+            GGML_PRINT("%s: not enough space in the scratch memory\n", __func__);
+            assert(false);
+            return NULL;
+        }
+
+        if (cur_end + sizeof(struct ggml_tensor) + GGML_OBJECT_SIZE > ctx->mem_size) {
+            GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
+                    __func__, cur_end + sizeof(struct ggml_tensor) + GGML_OBJECT_SIZE, ctx->mem_size);
+            assert(false);
+            return NULL;
+        }
+
+        data = (char * const) ctx->scratch.data + ctx->scratch.offs;
+
+        *obj_new = (struct ggml_object) {
+            .offs = cur_end + GGML_OBJECT_SIZE,
+            .size = sizeof(struct ggml_tensor),
+            .next = NULL,
+        };
+
+        //printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
+
+        ctx->scratch.offs += size_needed;
+    }

    if (obj_cur != NULL) {
        obj_cur->next = obj_new;
@ -1645,9 +1685,9 @@ struct ggml_tensor * ggml_new_tensor_impl(

    ctx->objects_end = obj_new;

-    //GGML_PRINT_DEBUG("%s: inserted new object at %zu\n", __func__, cur_end);
+    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);

-    struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offset);
+    struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offs);

    ggml_assert_aligned(result);

@ -1690,7 +1730,7 @@ struct ggml_tensor * ggml_new_tensor(
        struct ggml_context * ctx,
        enum   ggml_type type,
        int    n_dims,
-        const int* ne) {
+        const int * ne) {
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
 }

@ -1732,16 +1772,26 @@ struct ggml_tensor * ggml_new_tensor_4d(
 }

 struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
+    ctx->scratch_save = ctx->scratch;
+    ctx->scratch.data = NULL;
+
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);

+    ctx->scratch = ctx->scratch_save;
+
    ggml_set_i32(result, value);

    return result;
 }

 struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
+    ctx->scratch_save = ctx->scratch;
+    ctx->scratch.data = NULL;
+
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);

+    ctx->scratch = ctx->scratch_save;
+
    ggml_set_f32(result, value);

    return result;
@ -2350,7 +2400,7 @@ struct ggml_tensor * ggml_repeat(
    result->op   = GGML_OP_REPEAT;
    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
    result->src0 = a;
-    result->src1 = NULL;
+    result->src1 = b;

    return result;
 }
@ -2966,9 +3016,7 @@ struct ggml_tensor * ggml_diag_mask_inf(
    // TODO: when implement backward, fix this:
    //struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
-
-    struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);
-    ((int32_t *) b->data)[0] = n_past;
+    struct ggml_tensor * b = ggml_new_i32(ctx, n_past);

    result->op   = GGML_OP_DIAG_MASK_INF;
    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -4300,7 +4348,9 @@ static bool ggml_compute_forward_mul_mat_use_blas(
    const int ne1 = dst->ne[1];

    // TODO: find the optimal values for these
-    if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ne0 >= 32 && ne1 >= 32 && ne10 >= 32) {
+    if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && (
+             (ne0 >= 32 && ne1  >= 32   && ne10 >= 32)
+            )) {
        //printf("BLAS: %d %d %d\n", ne0, ne1, ne10);
        return true;
    }
@ -7289,6 +7339,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                                    node->n_tasks = 1; // TODO: this actually is doing nothing
                                                       //       the threads are still spinning
                                    cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
+                                    //printf("src0: ne0 = %d, ne1 = %d, ne = %d\n", node->src0->ne[0], node->src0->ne[1], node->src0->ne[0]*node->src0->ne[1]);
+                                    //printf("src1: ne0 = %d, ne1 = %d, ne = %d\n", node->src1->ne[0], node->src1->ne[1], node->src1->ne[0]*node->src1->ne[1]);
+                                    //printf("cur = %zu\n", cur);
                                } else {
                                    cur = sizeof(ggml_fp16_t)*ggml_nelements(node->src1);
                                }
--- a/ggml.h
+++ b/ggml.h
@ -301,6 +301,13 @@ struct ggml_cgraph {
    int64_t perf_time_us;
 };

+// scratch buffer
+struct ggml_scratch {
+    size_t offs;
+    size_t size;
+    void * data;
+};
+
 struct ggml_init_params {
    // memory pool
    size_t mem_size;   // bytes
@ -327,6 +334,8 @@ void ggml_free(struct ggml_context * ctx);

 size_t ggml_used_mem(const struct ggml_context * ctx);

+size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
+
 struct ggml_tensor * ggml_new_tensor(
        struct ggml_context * ctx,
        enum   ggml_type type,
--- a/whisper.cpp
+++ b/whisper.cpp