sync : whisper.cpp

3 years ago · 67ac34fcfa
parent e2f39f4b52
commit 67ac34fcfa
5 changed files with 60 additions and 14 deletions
--- a/examples/whisper/main.cpp
+++ b/examples/whisper/main.cpp
@ -14,13 +14,16 @@
 //  500 -> 00:05.000
 // 6000 -> 01:00.000
 std::string to_timestamp(int64_t t) {
-    int64_t sec = t/100;
-    int64_t msec = t - sec*100;
-    int64_t min = sec/60;
-    sec = sec - min*60;
+    int64_t msec = t * 10;
+    int64_t hr = msec / (1000 * 60 * 60);
+    msec = msec - hr * (1000 * 60 * 60);
+    int64_t min = msec / (1000 * 60);
+    msec = msec - min * (1000 * 60);
+    int64_t sec = msec / 1000;
+    msec = msec - sec * 1000;
    
    char buf[32];
-    snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
+    snprintf(buf, sizeof(buf), "%02d:%02d:%02d.%03d", (int) hr, (int) min, (int) sec, (int) msec);

    return std::string(buf);
 }
--- a/examples/whisper/whisper.cpp
+++ b/examples/whisper/whisper.cpp
@ -4,6 +4,7 @@

 #include <algorithm>
 #include <cassert>
+#define _USE_MATH_DEFINES
 #include <cmath>
 #include <cstdio>
 #include <cstring>
@ -2072,6 +2073,8 @@ bool log_mel_spectrogram(
 //

 struct whisper_context * whisper_init(const char * path_model) {
+    ggml_time_init();
+
    whisper_context * ctx = new whisper_context;

    const int64_t t_start_us = ggml_time_us();
@ -2259,7 +2262,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
    switch (strategy) {
        case WHISPER_DECODE_GREEDY:
            {
-                result = (struct whisper_full_params) {
+                result = {
                    .strategy  = WHISPER_DECODE_GREEDY,
                    .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
                    .offset_ms = 0,
@ -2280,7 +2283,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
            } break;
        case WHISPER_DECODE_BEAM_SEARCH:
            {
-                result = (struct whisper_full_params) {
+                result = {
                    .strategy  = WHISPER_DECODE_GREEDY,
                    .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
                    .offset_ms = 0,
@ -2317,6 +2320,13 @@ int whisper_full(
        return -1;
    }

+    // if length of spectrogram is less than 1s (100 samples), then return
+    // basically don't process anything that is less than 1s
+    // see issue #39: https://github.com/ggerganov/whisper.cpp/issues/39
+    if (whisper_n_len(ctx) < 100) {
+        return 0;
+    }
+
    // the accumulated text context so far
    auto & prompt_past = ctx->prompt_past;
    if (params.no_context) {
@ -2386,7 +2396,7 @@ int whisper_full(
        // print the prompt
        //printf("\n\n");
        //for (int i = 0; i < prompt.size(); i++) {
-        //    printf("%s: prompt[%d] = %s\n", __func__, i, vocab.id_to_token[prompt[i]].c_str());
+        //    printf("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token[prompt[i]].c_str());
        //}
        //printf("\n\n");

--- a/examples/whisper/whisper.h
+++ b/examples/whisper/whisper.h
@ -2,6 +2,7 @@
 #define WHISPER_H

 #include <stdint.h>
+#include <stdbool.h>

 #ifdef WHISPER_SHARED
 #    ifdef _WIN32
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@ -136,6 +136,7 @@ struct ggml_init_params {
    void * mem_buffer; // if NULL, memory will be allocated internally
 };

+void ggml_time_init(void);
 int64_t ggml_time_ms(void);
 int64_t ggml_time_us(void);
 int64_t ggml_cycles(void);
--- a/src/ggml.c
+++ b/src/ggml.c
@ -1,6 +1,11 @@
 #include "ggml.h"

+#if defined(_MSC_VER) || defined(__MINGW32__)
+#include <malloc.h> // using malloc.h with MSC/MINGW
+#else
 #include <alloca.h>
+#endif
+
 #include <assert.h>
 #include <time.h>
 #include <math.h>
@ -8,9 +13,15 @@
 #include <string.h>
 #include <stdint.h>
 #include <stdio.h>
-#include <stdatomic.h>

+
+#if defined _MSC_VER
+#include "msvc_thread_atomic.h"
+#else
 #include <pthread.h>
+#include <stdatomic.h>
+typedef void* thread_ret_t;
+#endif

 #define GGML_DEBUG 0

@ -144,6 +155,25 @@ static ggml_fp16_t table_exp_f16[1 << 16];
 // timing
 //

+#if defined(_MSC_VER)
+static int64_t timer_freq;
+void ggml_time_init(void) {
+    LARGE_INTEGER frequency;
+    QueryPerformanceFrequency(&frequency);
+    timer_freq = frequency.QuadPart;
+}
+int64_t ggml_time_ms(void) {
+    LARGE_INTEGER t;
+    QueryPerformanceCounter(&t);
+    return (t.QuadPart * 1000) / timer_freq;
+}
+int64_t ggml_time_us(void) {
+    LARGE_INTEGER t;
+    QueryPerformanceCounter(&t);
+    return (t.QuadPart * 1000000) / timer_freq;
+}
+#else
+void ggml_time_init(void) {}
 int64_t ggml_time_ms(void) {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
@ -155,6 +185,7 @@ int64_t ggml_time_us(void) {
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
 }
+#endif

 int64_t ggml_cycles(void) {
    return clock();
@ -6407,7 +6438,7 @@ void * ggml_graph_compute_one(void * data) {
    return NULL;
 }

-void * ggml_graph_compute_thread(void * data) {
+thread_ret_t ggml_graph_compute_thread(void * data) {
    struct ggml_compute_state * state = (struct ggml_compute_state *) data;

    const int n_threads = state->shared->n_threads;
@ -6418,7 +6449,7 @@ void * ggml_graph_compute_thread(void * data) {
        } else {
            while (atomic_load(&state->shared->has_work)) {
                if (atomic_load(&state->shared->stop)) {
-                    return NULL;
+                    return 0;
                }
                ggml_lock_lock  (&state->shared->spin);
                ggml_lock_unlock(&state->shared->spin);
@ -6430,7 +6461,7 @@ void * ggml_graph_compute_thread(void * data) {
        // wait for work
        while (!atomic_load(&state->shared->has_work)) {
            if (atomic_load(&state->shared->stop)) {
-                return NULL;
+                return 0;
            }
            ggml_lock_lock  (&state->shared->spin);
            ggml_lock_unlock(&state->shared->spin);
@ -6449,7 +6480,7 @@ void * ggml_graph_compute_thread(void * data) {
        }
    }

-    return NULL;
+    return 0;
 }

 void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {