sync : whisper.cpp

experiments/blocking
Georgi Gerganov 2 years ago
parent e2f39f4b52
commit 67ac34fcfa
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

@ -14,13 +14,16 @@
// 500 -> 00:05.000 // 500 -> 00:05.000
// 6000 -> 01:00.000 // 6000 -> 01:00.000
std::string to_timestamp(int64_t t) { std::string to_timestamp(int64_t t) {
int64_t sec = t/100; int64_t msec = t * 10;
int64_t msec = t - sec*100; int64_t hr = msec / (1000 * 60 * 60);
int64_t min = sec/60; msec = msec - hr * (1000 * 60 * 60);
sec = sec - min*60; int64_t min = msec / (1000 * 60);
msec = msec - min * (1000 * 60);
int64_t sec = msec / 1000;
msec = msec - sec * 1000;
char buf[32]; char buf[32];
snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec); snprintf(buf, sizeof(buf), "%02d:%02d:%02d.%03d", (int) hr, (int) min, (int) sec, (int) msec);
return std::string(buf); return std::string(buf);
} }

@ -4,6 +4,7 @@
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#define _USE_MATH_DEFINES
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
@ -2072,6 +2073,8 @@ bool log_mel_spectrogram(
// //
struct whisper_context * whisper_init(const char * path_model) { struct whisper_context * whisper_init(const char * path_model) {
ggml_time_init();
whisper_context * ctx = new whisper_context; whisper_context * ctx = new whisper_context;
const int64_t t_start_us = ggml_time_us(); const int64_t t_start_us = ggml_time_us();
@ -2259,7 +2262,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
switch (strategy) { switch (strategy) {
case WHISPER_DECODE_GREEDY: case WHISPER_DECODE_GREEDY:
{ {
result = (struct whisper_full_params) { result = {
.strategy = WHISPER_DECODE_GREEDY, .strategy = WHISPER_DECODE_GREEDY,
.n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()), .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
.offset_ms = 0, .offset_ms = 0,
@ -2280,7 +2283,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
} break; } break;
case WHISPER_DECODE_BEAM_SEARCH: case WHISPER_DECODE_BEAM_SEARCH:
{ {
result = (struct whisper_full_params) { result = {
.strategy = WHISPER_DECODE_GREEDY, .strategy = WHISPER_DECODE_GREEDY,
.n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()), .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
.offset_ms = 0, .offset_ms = 0,
@ -2317,6 +2320,13 @@ int whisper_full(
return -1; return -1;
} }
// if length of spectrogram is less than 1s (100 samples), then return
// basically don't process anything that is less than 1s
// see issue #39: https://github.com/ggerganov/whisper.cpp/issues/39
if (whisper_n_len(ctx) < 100) {
return 0;
}
// the accumulated text context so far // the accumulated text context so far
auto & prompt_past = ctx->prompt_past; auto & prompt_past = ctx->prompt_past;
if (params.no_context) { if (params.no_context) {
@ -2386,7 +2396,7 @@ int whisper_full(
// print the prompt // print the prompt
//printf("\n\n"); //printf("\n\n");
//for (int i = 0; i < prompt.size(); i++) { //for (int i = 0; i < prompt.size(); i++) {
// printf("%s: prompt[%d] = %s\n", __func__, i, vocab.id_to_token[prompt[i]].c_str()); // printf("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token[prompt[i]].c_str());
//} //}
//printf("\n\n"); //printf("\n\n");

@ -2,6 +2,7 @@
#define WHISPER_H #define WHISPER_H
#include <stdint.h> #include <stdint.h>
#include <stdbool.h>
#ifdef WHISPER_SHARED #ifdef WHISPER_SHARED
# ifdef _WIN32 # ifdef _WIN32

@ -136,6 +136,7 @@ struct ggml_init_params {
void * mem_buffer; // if NULL, memory will be allocated internally void * mem_buffer; // if NULL, memory will be allocated internally
}; };
void ggml_time_init(void);
int64_t ggml_time_ms(void); int64_t ggml_time_ms(void);
int64_t ggml_time_us(void); int64_t ggml_time_us(void);
int64_t ggml_cycles(void); int64_t ggml_cycles(void);

@ -1,6 +1,11 @@
#include "ggml.h" #include "ggml.h"
#if defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h> // using malloc.h with MSC/MINGW
#else
#include <alloca.h> #include <alloca.h>
#endif
#include <assert.h> #include <assert.h>
#include <time.h> #include <time.h>
#include <math.h> #include <math.h>
@ -8,9 +13,15 @@
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdatomic.h>
#if defined _MSC_VER
#include "msvc_thread_atomic.h"
#else
#include <pthread.h> #include <pthread.h>
#include <stdatomic.h>
typedef void* thread_ret_t;
#endif
#define GGML_DEBUG 0 #define GGML_DEBUG 0
@ -144,6 +155,25 @@ static ggml_fp16_t table_exp_f16[1 << 16];
// timing // timing
// //
#if defined(_MSC_VER)
static int64_t timer_freq;
void ggml_time_init(void) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
timer_freq = frequency.QuadPart;
}
int64_t ggml_time_ms(void) {
LARGE_INTEGER t;
QueryPerformanceCounter(&t);
return (t.QuadPart * 1000) / timer_freq;
}
int64_t ggml_time_us(void) {
LARGE_INTEGER t;
QueryPerformanceCounter(&t);
return (t.QuadPart * 1000000) / timer_freq;
}
#else
void ggml_time_init(void) {}
int64_t ggml_time_ms(void) { int64_t ggml_time_ms(void) {
struct timespec ts; struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
@ -155,6 +185,7 @@ int64_t ggml_time_us(void) {
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000; return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
} }
#endif
int64_t ggml_cycles(void) { int64_t ggml_cycles(void) {
return clock(); return clock();
@ -6407,7 +6438,7 @@ void * ggml_graph_compute_one(void * data) {
return NULL; return NULL;
} }
void * ggml_graph_compute_thread(void * data) { thread_ret_t ggml_graph_compute_thread(void * data) {
struct ggml_compute_state * state = (struct ggml_compute_state *) data; struct ggml_compute_state * state = (struct ggml_compute_state *) data;
const int n_threads = state->shared->n_threads; const int n_threads = state->shared->n_threads;
@ -6418,7 +6449,7 @@ void * ggml_graph_compute_thread(void * data) {
} else { } else {
while (atomic_load(&state->shared->has_work)) { while (atomic_load(&state->shared->has_work)) {
if (atomic_load(&state->shared->stop)) { if (atomic_load(&state->shared->stop)) {
return NULL; return 0;
} }
ggml_lock_lock (&state->shared->spin); ggml_lock_lock (&state->shared->spin);
ggml_lock_unlock(&state->shared->spin); ggml_lock_unlock(&state->shared->spin);
@ -6430,7 +6461,7 @@ void * ggml_graph_compute_thread(void * data) {
// wait for work // wait for work
while (!atomic_load(&state->shared->has_work)) { while (!atomic_load(&state->shared->has_work)) {
if (atomic_load(&state->shared->stop)) { if (atomic_load(&state->shared->stop)) {
return NULL; return 0;
} }
ggml_lock_lock (&state->shared->spin); ggml_lock_lock (&state->shared->spin);
ggml_lock_unlock(&state->shared->spin); ggml_lock_unlock(&state->shared->spin);
@ -6449,7 +6480,7 @@ void * ggml_graph_compute_thread(void * data) {
} }
} }
return NULL; return 0;
} }
void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {

Loading…
Cancel
Save