Merge branch 'ggerganov:master' into master

2 years ago · 7d9583cd5b
parent ad08bc0188 b992f3709e
commit 7d9583cd5b
9 changed files with 305 additions and 279 deletions
--- a/.github/workflows/bindings.yml
+++ b/.github/workflows/bindings.yml
@ -3,6 +3,11 @@ on:
  push:
    paths:
      - bindings/go/**
+      - whisper.h
+  pull_request:
+    paths:
+      - bindings/go/**
+      - whisper.h

 jobs:
  ubuntu-latest:
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -1,5 +1,5 @@
 name: CI
-on: [push]
+on: [push, pull_request]

 jobs:
  ubuntu-latest:
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 *.o
+*.a
 .cache/
 .vs/
 .vscode/
--- a/bindings/go/examples/go-whisper/process.go
+++ b/bindings/go/examples/go-whisper/process.go
@ -64,10 +64,13 @@ func Process(model whisper.Model, path string, flags *Flags) error {

 	// Process the data
 	fmt.Fprintf(flags.Output(), "  ...processing %q\n", path)
+	context.ResetTimings()
 	if err := context.Process(data, cb); err != nil {
 		return err
 	}

+	context.PrintTimings()
+
 	// Print out the results
 	switch {
 	case flags.GetOut() == "srt":
--- a/bindings/go/pkg/whisper/context.go
+++ b/bindings/go/pkg/whisper/context.go
@ -107,6 +107,16 @@ func (context *context) SetMaxTokensPerSegment(n uint) {
 	context.params.SetMaxTokensPerSegment(int(n))
 }

+// ResetTimings resets the mode timings. Should be called before processing
+func (context *context) ResetTimings() {
+	context.model.ctx.Whisper_reset_timings()
+}
+
+// PrintTimings prints the model timings to stdout.
+func (context *context) PrintTimings() {
+	context.model.ctx.Whisper_print_timings()
+}
+
 // Process new sample data and return any errors
 func (context *context) Process(data []float32, cb SegmentCallback) error {
 	if context.model.ctx == nil {
--- a/bindings/go/pkg/whisper/interface.go
+++ b/bindings/go/pkg/whisper/interface.go
@ -60,6 +60,9 @@ type Context interface {
 	IsNOT(Token) bool          // Test for "No timestamps" token
 	IsLANG(Token, string) bool // Test for token associated with a specific language
 	IsText(Token) bool         // Test for text token
+
+	PrintTimings()
+	ResetTimings()
 }

 // Segment is the text result of a speech recognition.
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/examples/helpers.js
+++ b/examples/helpers.js
@ -88,11 +88,15 @@ async function fetchRemote(url, cbProgress, cbPrint) {
 // - check if the data is already in the IndexedDB
 // - if not, fetch it from the remote URL and store it in the IndexedDB
 function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {
+    if (!navigator.storage || !navigator.storage.estimate) {
+        cbPrint('loadRemote: navigator.storage.estimate() is not supported');
+    } else {
        // query the storage quota and print it
        navigator.storage.estimate().then(function (estimate) {
            cbPrint('loadRemote: storage quota: ' + estimate.quota + ' bytes');
            cbPrint('loadRemote: storage usage: ' + estimate.usage + ' bytes');
        });
+    }

    // check if the data is already in the IndexedDB
    var rq = indexedDB.open(dbName, dbVersion);
--- a/whisper.cpp
+++ b/whisper.cpp
@ -3524,7 +3524,7 @@ int whisper_full(
                prompt.clear();

                // if we have already generated some text, use it as a prompt to condition the next generation
-                if (!prompt_past.empty() && t_cur < 0.5f) {
+                if (!prompt_past.empty() && t_cur < 0.5f && params.n_max_text_ctx > 0) {
                    int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));

                    prompt = { whisper_token_prev(ctx) };
@ -3535,11 +3535,11 @@ int whisper_full(
                prompt.insert(prompt.end(), prompt_init.begin(), prompt_init.end());

                // print the prompt
-                //WHISPER_PRINT_DEBUG("\n\n");
-                //for (int i = 0; i < (int) prompt.size(); i++) {
-                //    WHISPER_PRINT_DEBUG("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token.at(prompt[i]).c_str());
-                //}
-                //WHISPER_PRINT_DEBUG("\n\n");
+                WHISPER_PRINT_DEBUG("\n\n");
+                for (int i = 0; i < (int) prompt.size(); i++) {
+                    WHISPER_PRINT_DEBUG("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token.at(prompt[i]).c_str());
+                }
+                WHISPER_PRINT_DEBUG("\n\n");

                if (!whisper_decode(*ctx, ctx->decoders[0], prompt.data(), prompt.size(), 0, params.n_threads)) {
                    fprintf(stderr, "%s: failed to decode\n", __func__);