whisper : fixed Beam Search Strategy and exposed whisper_pcm_to_mel_phase_vocoder (#474)

Co-authored-by: Sandro Hanea <sandrohanea@microsoft.com>
pull/485/head
sandrohanea 1 year ago committed by GitHub
parent 4dd7119deb
commit 2bfe0ebc0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2905,7 +2905,7 @@ const char * whisper_print_system_info(void) {
struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
struct whisper_full_params result = {
/*.strategy =*/ WHISPER_SAMPLING_GREEDY,
/*.strategy =*/ strategy,
/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
/*.n_max_text_ctx =*/ 16384,
@ -3829,7 +3829,7 @@ int whisper_full(
auto & cur = beam_candidates[cur_c++];
while (beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
while (beam_candidates.size() > cur_c && beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
++cur_c;
}

@ -113,6 +113,16 @@ extern "C" {
int n_samples,
int n_threads);
// Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
// The resulting spectrogram is stored inside the provided whisper context.
// Returns 0 on success
WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
struct whisper_context* ctx,
const float* samples,
int n_samples,
int n_threads);
// This can be used to set a custom log mel spectrogram inside the provided whisper context.
// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
// n_mel must be 80

Loading…
Cancel
Save