diff --git a/examples/whisper/main.cpp b/examples/whisper/main.cpp index 5362d4a..43838cf 100644 --- a/examples/whisper/main.cpp +++ b/examples/whisper/main.cpp @@ -14,13 +14,16 @@ // 500 -> 00:05.000 // 6000 -> 01:00.000 std::string to_timestamp(int64_t t) { - int64_t sec = t/100; - int64_t msec = t - sec*100; - int64_t min = sec/60; - sec = sec - min*60; - + int64_t msec = t * 10; + int64_t hr = msec / (1000 * 60 * 60); + msec = msec - hr * (1000 * 60 * 60); + int64_t min = msec / (1000 * 60); + msec = msec - min * (1000 * 60); + int64_t sec = msec / 1000; + msec = msec - sec * 1000; + char buf[32]; - snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec); + snprintf(buf, sizeof(buf), "%02d:%02d:%02d.%03d", (int) hr, (int) min, (int) sec, (int) msec); return std::string(buf); } diff --git a/examples/whisper/whisper.cpp b/examples/whisper/whisper.cpp index 81da469..a5f79d2 100644 --- a/examples/whisper/whisper.cpp +++ b/examples/whisper/whisper.cpp @@ -4,6 +4,7 @@ #include #include +#define _USE_MATH_DEFINES #include #include #include @@ -2072,6 +2073,8 @@ bool log_mel_spectrogram( // struct whisper_context * whisper_init(const char * path_model) { + ggml_time_init(); + whisper_context * ctx = new whisper_context; const int64_t t_start_us = ggml_time_us(); @@ -2259,7 +2262,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat switch (strategy) { case WHISPER_DECODE_GREEDY: { - result = (struct whisper_full_params) { + result = { .strategy = WHISPER_DECODE_GREEDY, .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()), .offset_ms = 0, @@ -2280,7 +2283,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat } break; case WHISPER_DECODE_BEAM_SEARCH: { - result = (struct whisper_full_params) { + result = { .strategy = WHISPER_DECODE_GREEDY, .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()), .offset_ms = 0, @@ -2317,6 +2320,13 @@ int whisper_full( return -1; } + // if length of spectrogram is less than 1s (100 samples), then return + // basically don't process anything that is less than 1s + // see issue #39: https://github.com/ggerganov/whisper.cpp/issues/39 + if (whisper_n_len(ctx) < 100) { + return 0; + } + // the accumulated text context so far auto & prompt_past = ctx->prompt_past; if (params.no_context) { @@ -2386,7 +2396,7 @@ int whisper_full( // print the prompt //printf("\n\n"); //for (int i = 0; i < prompt.size(); i++) { - // printf("%s: prompt[%d] = %s\n", __func__, i, vocab.id_to_token[prompt[i]].c_str()); + // printf("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token[prompt[i]].c_str()); //} //printf("\n\n"); diff --git a/examples/whisper/whisper.h b/examples/whisper/whisper.h index f462370..381afd7 100644 --- a/examples/whisper/whisper.h +++ b/examples/whisper/whisper.h @@ -2,6 +2,7 @@ #define WHISPER_H #include +#include #ifdef WHISPER_SHARED # ifdef _WIN32 diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index 5b7b258..34f104b 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -136,6 +136,7 @@ struct ggml_init_params { void * mem_buffer; // if NULL, memory will be allocated internally }; +void ggml_time_init(void); int64_t ggml_time_ms(void); int64_t ggml_time_us(void); int64_t ggml_cycles(void); diff --git a/src/ggml.c b/src/ggml.c index a87e8db..6608300 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -1,6 +1,11 @@ #include "ggml.h" +#if defined(_MSC_VER) || defined(__MINGW32__) +#include // using malloc.h with MSC/MINGW +#else #include +#endif + #include #include #include @@ -8,9 +13,15 @@ #include #include #include -#include + +#if defined _MSC_VER +#include "msvc_thread_atomic.h" +#else #include +#include +typedef void* thread_ret_t; +#endif #define GGML_DEBUG 0 @@ -144,6 +155,25 @@ static ggml_fp16_t table_exp_f16[1 << 16]; // timing // +#if defined(_MSC_VER) +static int64_t timer_freq; +void ggml_time_init(void) { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + timer_freq = frequency.QuadPart; +} +int64_t ggml_time_ms(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return (t.QuadPart * 1000) / timer_freq; +} +int64_t ggml_time_us(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return (t.QuadPart * 1000000) / timer_freq; +} +#else +void ggml_time_init(void) {} int64_t ggml_time_ms(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); @@ -155,6 +185,7 @@ int64_t ggml_time_us(void) { clock_gettime(CLOCK_MONOTONIC, &ts); return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000; } +#endif int64_t ggml_cycles(void) { return clock(); @@ -6407,7 +6438,7 @@ void * ggml_graph_compute_one(void * data) { return NULL; } -void * ggml_graph_compute_thread(void * data) { +thread_ret_t ggml_graph_compute_thread(void * data) { struct ggml_compute_state * state = (struct ggml_compute_state *) data; const int n_threads = state->shared->n_threads; @@ -6418,7 +6449,7 @@ void * ggml_graph_compute_thread(void * data) { } else { while (atomic_load(&state->shared->has_work)) { if (atomic_load(&state->shared->stop)) { - return NULL; + return 0; } ggml_lock_lock (&state->shared->spin); ggml_lock_unlock(&state->shared->spin); @@ -6430,7 +6461,7 @@ void * ggml_graph_compute_thread(void * data) { // wait for work while (!atomic_load(&state->shared->has_work)) { if (atomic_load(&state->shared->stop)) { - return NULL; + return 0; } ggml_lock_lock (&state->shared->spin); ggml_lock_unlock(&state->shared->spin); @@ -6449,7 +6480,7 @@ void * ggml_graph_compute_thread(void * data) { } } - return NULL; + return 0; } void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {