|
|
|
@ -234,6 +234,7 @@ int main(int argc, char ** argv) {
|
|
|
|
|
std::vector<float> pcmf32(n_samples_30s, 0.0f);
|
|
|
|
|
std::vector<float> pcmf32_old;
|
|
|
|
|
|
|
|
|
|
std::vector<whisper_token> prompt_tokens;
|
|
|
|
|
const int n_new_line = params.length_ms / params.step_ms - 1;
|
|
|
|
|
|
|
|
|
|
// print some info about the processing
|
|
|
|
@ -344,6 +345,8 @@ int main(int argc, char ** argv) {
|
|
|
|
|
wparams.audio_ctx = params.audio_ctx;
|
|
|
|
|
wparams.speed_up = params.speed_up;
|
|
|
|
|
|
|
|
|
|
wparams.prompt_tokens = &prompt_tokens;
|
|
|
|
|
|
|
|
|
|
if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
|
|
|
|
fprintf(stderr, "%s: failed to process audio\n", argv[0]);
|
|
|
|
|
return 6;
|
|
|
|
@ -393,6 +396,16 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
|
|
|
|
// keep part of the audio for next iteration to try to mitigate word boundary issues
|
|
|
|
|
pcmf32_old = std::vector<float>(pcmf32.end() - n_samples_keep, pcmf32.end());
|
|
|
|
|
|
|
|
|
|
// Add tokens of the last full length segment as the prompt
|
|
|
|
|
prompt_tokens.clear();
|
|
|
|
|
const int n_segments = whisper_full_n_segments(ctx);
|
|
|
|
|
for (int i = 0; i < n_segments; ++i) {
|
|
|
|
|
const int token_count = whisper_full_n_tokens(ctx, i);
|
|
|
|
|
for (int j = 0; j < token_count; ++j) {
|
|
|
|
|
prompt_tokens.push_back(whisper_full_get_token_id(ctx, i, j));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|