From 8841840226e6368dbcb26f99152a2c213280ab76 Mon Sep 17 00:00:00 2001 From: Sandro Hanea Date: Mon, 13 Feb 2023 15:19:38 +0100 Subject: [PATCH] Fixed node.addon + one warning which I introduced. --- examples/addon.node/addon.cpp | 27 +++++++++++++++------------ whisper.cpp | 5 ++--- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/examples/addon.node/addon.cpp b/examples/addon.node/addon.cpp index 57c3514..8f880c4 100644 --- a/examples/addon.node/addon.cpp +++ b/examples/addon.node/addon.cpp @@ -74,11 +74,11 @@ int timestamp_to_sample(int64_t t, int n_samples) { return std::max(0, std::min((int) n_samples - 1, (int) ((t*WHISPER_SAMPLE_RATE)/100))); } -void whisper_print_segment_callback(struct whisper_context * ctx, int n_new, void * user_data) { +void whisper_print_segment_callback(struct whisper_context * /*ctx*/, struct whisper_state * state, int n_new, void * user_data) { const auto & params = *((whisper_print_user_data *) user_data)->params; const auto & pcmf32s = *((whisper_print_user_data *) user_data)->pcmf32s; - const int n_segments = whisper_full_n_segments(ctx); + const int n_segments = whisper_full_n_segments(state); std::string speaker = ""; @@ -94,8 +94,8 @@ void whisper_print_segment_callback(struct whisper_context * ctx, int n_new, voi for (int i = s0; i < n_segments; i++) { if (!params.no_timestamps || params.diarize) { - t0 = whisper_full_get_segment_t0(ctx, i); - t1 = whisper_full_get_segment_t1(ctx, i); + t0 = whisper_full_get_segment_t0(state, i); + t1 = whisper_full_get_segment_t1(state, i); } if (!params.no_timestamps) { @@ -129,7 +129,7 @@ void whisper_print_segment_callback(struct whisper_context * ctx, int n_new, voi // colorful print bug // - const char * text = whisper_full_get_segment_text(ctx, i); + const char * text = whisper_full_get_segment_text(state, i); printf("%s%s", speaker.c_str(), text); @@ -158,6 +158,8 @@ int run(whisper_params ¶ms, std::vector> &result) { struct whisper_context * ctx = whisper_init_from_file(params.model.c_str()); + struct whisper_state * state = whisper_init_state(ctx); + if (ctx == nullptr) { fprintf(stderr, "error: failed to initialize whisper context\n"); return 3; @@ -340,33 +342,34 @@ int run(whisper_params ¶ms, std::vector> &result) { { static bool is_aborted = false; // NOTE: this should be atomic to avoid data race - wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) { + wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/ , void * user_data) { bool is_aborted = *(bool*)user_data; return !is_aborted; }; wparams.encoder_begin_callback_user_data = &is_aborted; } - if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) { + if (whisper_full_parallel(ctx, state, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) { fprintf(stderr, "failed to process audio\n"); return 10; } } } - const int n_segments = whisper_full_n_segments(ctx); + const int n_segments = whisper_full_n_segments(state); result.resize(n_segments); for (int i = 0; i < n_segments; ++i) { - const char * text = whisper_full_get_segment_text(ctx, i); - const int64_t t0 = whisper_full_get_segment_t0(ctx, i); - const int64_t t1 = whisper_full_get_segment_t1(ctx, i); + const char * text = whisper_full_get_segment_text(state, i); + const int64_t t0 = whisper_full_get_segment_t0(state, i); + const int64_t t1 = whisper_full_get_segment_t1(state, i); result[i].emplace_back(to_timestamp(t0, true)); result[i].emplace_back(to_timestamp(t1, true)); result[i].emplace_back(text); } - whisper_print_timings(ctx); + whisper_print_timings(ctx, state); + whisper_free_state(state); whisper_free(ctx); return 0; diff --git a/whisper.cpp b/whisper.cpp index cb67dc9..83b1ce3 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -2278,7 +2278,6 @@ static void fft(const std::vector & in, std::vector & out) { // ref: https://github.com/openai/whisper/blob/main/whisper/audio.py#L92-L124 static bool log_mel_spectrogram( - whisper_context & wctx, whisper_state & wstate, const float * samples, const int n_samples, @@ -2633,7 +2632,7 @@ void whisper_free(struct whisper_context * ctx) { } int whisper_pcm_to_mel(struct whisper_context * ctx, struct whisper_state * state, const float * samples, int n_samples, int n_threads) { - if (!log_mel_spectrogram(*ctx, *state, samples, n_samples, WHISPER_SAMPLE_RATE, WHISPER_N_FFT, WHISPER_HOP_LENGTH, WHISPER_N_MEL, n_threads, ctx->model.filters, false, state->mel)) { + if (!log_mel_spectrogram(*state, samples, n_samples, WHISPER_SAMPLE_RATE, WHISPER_N_FFT, WHISPER_HOP_LENGTH, WHISPER_N_MEL, n_threads, ctx->model.filters, false, state->mel)) { fprintf(stderr, "%s: failed to compute mel spectrogram\n", __func__); return -1; } @@ -2643,7 +2642,7 @@ int whisper_pcm_to_mel(struct whisper_context * ctx, struct whisper_state * stat // same as whisper_pcm_to_mel, but applies a Phase Vocoder to speed up the audio x2 int whisper_pcm_to_mel_phase_vocoder(struct whisper_context * ctx, struct whisper_state * state, const float * samples, int n_samples, int n_threads) { - if (!log_mel_spectrogram(*ctx, *state, samples, n_samples, WHISPER_SAMPLE_RATE, 2*WHISPER_N_FFT, 2*WHISPER_HOP_LENGTH, WHISPER_N_MEL, n_threads, ctx->model.filters, true, state->mel)) { + if (!log_mel_spectrogram(*state, samples, n_samples, WHISPER_SAMPLE_RATE, 2*WHISPER_N_FFT, 2*WHISPER_HOP_LENGTH, WHISPER_N_MEL, n_threads, ctx->model.filters, true, state->mel)) { fprintf(stderr, "%s: failed to compute mel spectrogram\n", __func__); return -1; }