ADD VAD to save inference cycles

pull/24/head
Niranjan Yadla 2 years ago
parent 167324584b
commit b211d2f9b1

@ -17,6 +17,34 @@
#include <string>
#include <thread>
#include <vector>
#include <math.h>
// vad settings
const double MIN_ENERGY = 0.9;
const double INCREASE_FACTOR = 0.0008;
const double FRAME_MARGIN = 5;
// vad algorithm support variables
// TODO struct
size_t n_frame = 0;
double emin, emax;
double delta;
size_t margin_frame_counter;
double vad(float *buffer, size_t packet_length, size_t n_frame, size_t *mf_counter, double *emin, double *emax, double *delta)
{
// calculate energy of current frame (RMSE)
double current_energy = 0;
for (int i = 0; i < packet_length; i++)
{
current_energy += pow((double)buffer[i], 2);
}
current_energy = sqrt(current_energy / (double)packet_length);
printf("\ncurrent energy: %f\n", current_energy);
return current_energy;
}
// 500 -> 00:05.000
// 6000 -> 01:00.000
@ -241,8 +269,8 @@ int main(int argc, char ** argv) {
while (SDL_GetQueuedAudioSize(g_dev_id_in) < 3*WHISPER_SAMPLE_RATE*sizeof(float)) {
SDL_Delay(1);
}
const int n_samples_new = SDL_GetQueuedAudioSize(g_dev_id_in)/sizeof(float);
const int n_samples_new = SDL_GetQueuedAudioSize(g_dev_id_in)/sizeof(float);
// take one second from previous iteration
// TODO: better strategy
const int n_samples_take = std::min((int) pcmf32_old.size(), std::max(0, n_samples_30s/30 - n_samples_new));
@ -259,8 +287,10 @@ int main(int argc, char ** argv) {
pcmf32_old = pcmf32;
// run the inference
{
double vad_val;
vad_val = vad(pcmf32.data(), pcmf32.size(), n_frame++, &margin_frame_counter, &emin, &emax, &delta);
if (vad_val > 0.2f) // if vad says it's a voice packet...
{ // run the inference
whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
wparams.print_progress = false;

Loading…
Cancel
Save