Merge branch 'ggerganov:master' into master

pull/444/head
Alex Bacart 2 years ago committed by GitHub
commit 7d9583cd5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -3,11 +3,16 @@ on:
push: push:
paths: paths:
- bindings/go/** - bindings/go/**
- whisper.h
pull_request:
paths:
- bindings/go/**
- whisper.h
jobs: jobs:
ubuntu-latest: ubuntu-latest:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/setup-go@v3 - uses: actions/setup-go@v3
with: with:
go-version: '^1.19' go-version: '^1.19'

@ -1,267 +1,267 @@
name: CI name: CI
on: [push] on: [push, pull_request]
jobs: jobs:
ubuntu-latest: ubuntu-latest:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v1 uses: actions/checkout@v1
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential sudo apt-get install build-essential
sudo apt-get install libsdl2-dev sudo apt-get install libsdl2-dev
- name: Build - name: Build
run: | run: |
make make
make stream make stream
macOS-latest: macOS-latest:
runs-on: macOS-latest runs-on: macOS-latest
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v1 uses: actions/checkout@v1
- name: Dependencies - name: Dependencies
run: | run: |
brew update brew update
brew install sdl2 brew install sdl2
- name: Build - name: Build
run: | run: |
make make
make stream make stream
ubuntu-latest-gcc: ubuntu-latest-gcc:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
build: [Debug, Release] build: [Debug, Release]
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v1 uses: actions/checkout@v1
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential sudo apt-get install build-essential
sudo apt-get install cmake sudo apt-get install cmake
sudo apt-get install libsdl2-dev sudo apt-get install libsdl2-dev
- name: Configure - name: Configure
run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
- name: Build - name: Build
run: | run: |
make make
ctest -L gh --output-on-failure ctest -L gh --output-on-failure
ubuntu-latest-clang: ubuntu-latest-clang:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
build: [Debug, Release] build: [Debug, Release]
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v1 uses: actions/checkout@v1
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential sudo apt-get install build-essential
sudo apt-get install cmake sudo apt-get install cmake
sudo apt-get install libsdl2-dev sudo apt-get install libsdl2-dev
- name: Configure - name: Configure
run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
- name: Build - name: Build
run: | run: |
make make
ctest -L gh --output-on-failure ctest -L gh --output-on-failure
ubuntu-latest-gcc-sanitized: ubuntu-latest-gcc-sanitized:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
sanitizer: [ADDRESS, THREAD, UNDEFINED] sanitizer: [ADDRESS, THREAD, UNDEFINED]
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v1 uses: actions/checkout@v1
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential sudo apt-get install build-essential
sudo apt-get install cmake sudo apt-get install cmake
- name: Configure - name: Configure
run: cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON run: cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
- name: Build - name: Build
run: | run: |
make make
ctest -L gh --output-on-failure ctest -L gh --output-on-failure
windows: windows:
runs-on: windows-latest runs-on: windows-latest
strategy: strategy:
matrix: matrix:
build: [Release] build: [Release]
arch: [Win32, x64] arch: [Win32, x64]
sdl2: [ON] sdl2: [ON]
include: include:
- arch: Win32 - arch: Win32
s2arc: x86 s2arc: x86
- arch: x64 - arch: x64
s2arc: x64 s2arc: x64
- sdl2: ON - sdl2: ON
s2ver: 2.26.0 s2ver: 2.26.0
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v1 uses: actions/checkout@v1
- name: Add msbuild to PATH - name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v1 uses: microsoft/setup-msbuild@v1
- name: Fetch SDL2 and set SDL2_DIR - name: Fetch SDL2 and set SDL2_DIR
if: matrix.sdl2 == 'ON' if: matrix.sdl2 == 'ON'
run: | run: |
C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
7z x sdl2.zip 7z x sdl2.zip
echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
- name: Configure - name: Configure
run: > run: >
cmake -S . -B ./build -A ${{ matrix.arch }} cmake -S . -B ./build -A ${{ matrix.arch }}
-DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }} -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
- name: Build - name: Build
run: | run: |
cd ./build cd ./build
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
- name: Copy SDL2.dll - name: Copy SDL2.dll
if: matrix.sdl2 == 'ON' if: matrix.sdl2 == 'ON'
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
- name: Upload binaries - name: Upload binaries
if: matrix.sdl2 == 'ON' if: matrix.sdl2 == 'ON'
uses: actions/upload-artifact@v1 uses: actions/upload-artifact@v1
with: with:
name: whisper-bin-${{ matrix.arch }} name: whisper-bin-${{ matrix.arch }}
path: build/bin/${{ matrix.build }} path: build/bin/${{ matrix.build }}
windows-blas: windows-blas:
runs-on: windows-latest runs-on: windows-latest
strategy: strategy:
matrix: matrix:
build: [Release] build: [Release]
arch: [Win32, x64] arch: [Win32, x64]
blas: [ON] blas: [ON]
sdl2: [ON] sdl2: [ON]
include: include:
- arch: Win32 - arch: Win32
obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
s2arc: x86 s2arc: x86
- arch: x64 - arch: x64
obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
s2arc: x64 s2arc: x64
- sdl2: ON - sdl2: ON
s2ver: 2.26.0 s2ver: 2.26.0
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v1 uses: actions/checkout@v1
- name: Add msbuild to PATH - name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v1 uses: microsoft/setup-msbuild@v1
- name: Fetch OpenBLAS - name: Fetch OpenBLAS
if: matrix.blas == 'ON' if: matrix.blas == 'ON'
run: | run: |
C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }} C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
7z x blas.zip -oblas -y 7z x blas.zip -oblas -y
copy blas/include/cblas.h . copy blas/include/cblas.h .
copy blas/include/openblas_config.h . copy blas/include/openblas_config.h .
echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
- name: Fetch SDL2 and set SDL2_DIR - name: Fetch SDL2 and set SDL2_DIR
if: matrix.sdl2 == 'ON' if: matrix.sdl2 == 'ON'
run: | run: |
C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
7z x sdl2.zip 7z x sdl2.zip
echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
- name: Configure - name: Configure
run: > run: >
cmake -S . -B ./build -A ${{ matrix.arch }} cmake -S . -B ./build -A ${{ matrix.arch }}
-DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }} -DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
-DCMAKE_LIBRARY_PATH="$env:blasdir/lib" -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
-DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }} -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
- name: Build - name: Build
run: | run: |
cd ./build cd ./build
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
- name: Copy libopenblas.dll - name: Copy libopenblas.dll
if: matrix.blas == 'ON' if: matrix.blas == 'ON'
run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }} run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
- name: Copy SDL2.dll - name: Copy SDL2.dll
if: matrix.sdl2 == 'ON' if: matrix.sdl2 == 'ON'
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
- name: Upload binaries - name: Upload binaries
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
uses: actions/upload-artifact@v1 uses: actions/upload-artifact@v1
with: with:
name: whisper-blas-bin-${{ matrix.arch }} name: whisper-blas-bin-${{ matrix.arch }}
path: build/bin/${{ matrix.build }} path: build/bin/${{ matrix.build }}
emscripten: emscripten:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
build: [Release] build: [Release]
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v1 uses: actions/checkout@v1
- name: Dependencies - name: Dependencies
run: | run: |
wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
tar -xvf master.tar.gz tar -xvf master.tar.gz
emsdk-master/emsdk update emsdk-master/emsdk update
emsdk-master/emsdk install latest emsdk-master/emsdk install latest
emsdk-master/emsdk activate latest emsdk-master/emsdk activate latest
- name: Configure - name: Configure
run: echo "tmp" run: echo "tmp"
- name: Build - name: Build
run: | run: |
pushd emsdk-master pushd emsdk-master
source ./emsdk_env.sh source ./emsdk_env.sh
popd popd
emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
make make

1
.gitignore vendored

@ -1,4 +1,5 @@
*.o *.o
*.a
.cache/ .cache/
.vs/ .vs/
.vscode/ .vscode/

@ -64,10 +64,13 @@ func Process(model whisper.Model, path string, flags *Flags) error {
// Process the data // Process the data
fmt.Fprintf(flags.Output(), " ...processing %q\n", path) fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
context.ResetTimings()
if err := context.Process(data, cb); err != nil { if err := context.Process(data, cb); err != nil {
return err return err
} }
context.PrintTimings()
// Print out the results // Print out the results
switch { switch {
case flags.GetOut() == "srt": case flags.GetOut() == "srt":

@ -107,6 +107,16 @@ func (context *context) SetMaxTokensPerSegment(n uint) {
context.params.SetMaxTokensPerSegment(int(n)) context.params.SetMaxTokensPerSegment(int(n))
} }
// ResetTimings resets the mode timings. Should be called before processing
func (context *context) ResetTimings() {
context.model.ctx.Whisper_reset_timings()
}
// PrintTimings prints the model timings to stdout.
func (context *context) PrintTimings() {
context.model.ctx.Whisper_print_timings()
}
// Process new sample data and return any errors // Process new sample data and return any errors
func (context *context) Process(data []float32, cb SegmentCallback) error { func (context *context) Process(data []float32, cb SegmentCallback) error {
if context.model.ctx == nil { if context.model.ctx == nil {

@ -60,6 +60,9 @@ type Context interface {
IsNOT(Token) bool // Test for "No timestamps" token IsNOT(Token) bool // Test for "No timestamps" token
IsLANG(Token, string) bool // Test for token associated with a specific language IsLANG(Token, string) bool // Test for token associated with a specific language
IsText(Token) bool // Test for text token IsText(Token) bool // Test for text token
PrintTimings()
ResetTimings()
} }
// Segment is the text result of a speech recognition. // Segment is the text result of a speech recognition.

File diff suppressed because one or more lines are too long

@ -88,11 +88,15 @@ async function fetchRemote(url, cbProgress, cbPrint) {
// - check if the data is already in the IndexedDB // - check if the data is already in the IndexedDB
// - if not, fetch it from the remote URL and store it in the IndexedDB // - if not, fetch it from the remote URL and store it in the IndexedDB
function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) { function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {
// query the storage quota and print it if (!navigator.storage || !navigator.storage.estimate) {
navigator.storage.estimate().then(function (estimate) { cbPrint('loadRemote: navigator.storage.estimate() is not supported');
cbPrint('loadRemote: storage quota: ' + estimate.quota + ' bytes'); } else {
cbPrint('loadRemote: storage usage: ' + estimate.usage + ' bytes'); // query the storage quota and print it
}); navigator.storage.estimate().then(function (estimate) {
cbPrint('loadRemote: storage quota: ' + estimate.quota + ' bytes');
cbPrint('loadRemote: storage usage: ' + estimate.usage + ' bytes');
});
}
// check if the data is already in the IndexedDB // check if the data is already in the IndexedDB
var rq = indexedDB.open(dbName, dbVersion); var rq = indexedDB.open(dbName, dbVersion);

@ -3524,7 +3524,7 @@ int whisper_full(
prompt.clear(); prompt.clear();
// if we have already generated some text, use it as a prompt to condition the next generation // if we have already generated some text, use it as a prompt to condition the next generation
if (!prompt_past.empty() && t_cur < 0.5f) { if (!prompt_past.empty() && t_cur < 0.5f && params.n_max_text_ctx > 0) {
int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size())); int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));
prompt = { whisper_token_prev(ctx) }; prompt = { whisper_token_prev(ctx) };
@ -3535,11 +3535,11 @@ int whisper_full(
prompt.insert(prompt.end(), prompt_init.begin(), prompt_init.end()); prompt.insert(prompt.end(), prompt_init.begin(), prompt_init.end());
// print the prompt // print the prompt
//WHISPER_PRINT_DEBUG("\n\n"); WHISPER_PRINT_DEBUG("\n\n");
//for (int i = 0; i < (int) prompt.size(); i++) { for (int i = 0; i < (int) prompt.size(); i++) {
// WHISPER_PRINT_DEBUG("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token.at(prompt[i]).c_str()); WHISPER_PRINT_DEBUG("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token.at(prompt[i]).c_str());
//} }
//WHISPER_PRINT_DEBUG("\n\n"); WHISPER_PRINT_DEBUG("\n\n");
if (!whisper_decode(*ctx, ctx->decoders[0], prompt.data(), prompt.size(), 0, params.n_threads)) { if (!whisper_decode(*ctx, ctx->decoders[0], prompt.data(), prompt.size(), 0, params.n_threads)) {
fprintf(stderr, "%s: failed to decode\n", __func__); fprintf(stderr, "%s: failed to decode\n", __func__);

Loading…
Cancel
Save