Fixed whisper_n_len (which was used in some binding) and added whisper_n_len_from_state

2 years ago · 4608a7524e
parent 4533da2787
commit 4608a7524e
2 changed files with 12 additions and 7 deletions
--- a/whisper.cpp
+++ b/whisper.cpp
@ -2899,10 +2899,14 @@ int whisper_lang_auto_detect(
    return whisper_lang_auto_detect_with_state(ctx, ctx->default_state, offset_ms, n_threads, lang_probs);
 }

-int whisper_n_len(struct whisper_state * state) {
+int whisper_n_len_from_state(struct whisper_state * state) {
    return state->mel.n_len;
 }

+int whisper_n_len(struct whisper_context * ctx) {
+    return ctx->default_state->mel.n_len;
+}
+
 int whisper_n_vocab(struct whisper_context * ctx) {
    return ctx->vocab.n_vocab;
 }
@ -3655,7 +3659,7 @@ int whisper_full_with_state(
    }

    const int seek_start = params.offset_ms/10;
-    const int seek_end = seek_start + (params.duration_ms == 0 ? whisper_n_len(state) : params.duration_ms/10);
+    const int seek_end = seek_start + (params.duration_ms == 0 ? whisper_n_len_from_state(state) : params.duration_ms/10);

    // if length of spectrogram is less than 1s (100 samples), then return
    // basically don't process anything that is less than 1s
--- a/whisper.h
+++ b/whisper.h
@ -264,11 +264,12 @@ extern "C" {
                               int   n_threads,
                             float * lang_probs);

-    WHISPER_API int whisper_n_len          (struct whisper_context * ctx); // mel length
-    WHISPER_API int whisper_n_vocab        (struct whisper_context * ctx);
-    WHISPER_API int whisper_n_text_ctx     (struct whisper_context * ctx);
-    WHISPER_API int whisper_n_audio_ctx    (struct whisper_context * ctx);
-    WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
+    WHISPER_API int whisper_n_len           (struct whisper_context * ctx); // mel length
+    WHISPER_API int whisper_n_len_from_state(struct whisper_state * state); // mel length
+    WHISPER_API int whisper_n_vocab         (struct whisper_context * ctx);
+    WHISPER_API int whisper_n_text_ctx      (struct whisper_context * ctx);
+    WHISPER_API int whisper_n_audio_ctx     (struct whisper_context * ctx);
+    WHISPER_API int whisper_is_multilingual (struct whisper_context * ctx);

    // Token logits obtained from the last call to whisper_decode() for the default state
    // The logits for the last token are stored in the last row