|
|
@ -671,7 +671,31 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const auto * probs = whisper_get_probs(ctx);
|
|
|
|
// estimate command probability
|
|
|
|
|
|
|
|
// NOTE: not optimal
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
const auto * logits = whisper_get_logits(ctx);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<float> probs(whisper_n_vocab(ctx), 0.0f);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// compute probs from logits via softmax
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
float max = -1e9;
|
|
|
|
|
|
|
|
for (int i = 0; i < (int) probs.size(); ++i) {
|
|
|
|
|
|
|
|
max = std::max(max, logits[i]);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
float sum = 0.0f;
|
|
|
|
|
|
|
|
for (int i = 0; i < (int) probs.size(); ++i) {
|
|
|
|
|
|
|
|
probs[i] = expf(logits[i] - max);
|
|
|
|
|
|
|
|
sum += probs[i];
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < (int) probs.size(); ++i) {
|
|
|
|
|
|
|
|
probs[i] /= sum;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::pair<float, int>> probs_id;
|
|
|
|
std::vector<std::pair<float, int>> probs_id;
|
|
|
|
|
|
|
|
|
|
|
|
double psum = 0.0;
|
|
|
|
double psum = 0.0;
|
|
|
@ -722,6 +746,7 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
|
|
|
|
(int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
|
|
|
|
(int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
|
|
|
|
fprintf(stdout, "\n");
|
|
|
|
fprintf(stdout, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
audio.clear();
|
|
|
|
audio.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|