diff --git a/CMakeLists.txt b/CMakeLists.txt index e494e7c..bb32dea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,19 +26,21 @@ option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF) # sanitizers -if (WHISPER_SANITIZE_THREAD) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread") -endif() +if (NOT MSVC) + if (WHISPER_SANITIZE_THREAD) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread") + endif() -if (WHISPER_SANITIZE_ADDRESS) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") -endif() + if (WHISPER_SANITIZE_ADDRESS) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") + endif() -if (WHISPER_SANITIZE_UNDEFINED) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined") + if (WHISPER_SANITIZE_UNDEFINED) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined") + endif() endif() #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math") @@ -47,7 +49,7 @@ endif() # dependencies set(CMAKE_C_STANDARD 11) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 20) find_package(Threads REQUIRED) @@ -69,7 +71,7 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) endif () if (WHISPER_ALL_WARNINGS) - if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang") + if (NOT MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \ -Wall \ -Wextra \ @@ -80,12 +82,14 @@ if (WHISPER_ALL_WARNINGS) -Wpointer-arith \ ") else() - # todo : windows + # todo : msvc endif() endif() -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla") -#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations") +if (NOT MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla") + #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations") +endif() message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") @@ -93,7 +97,11 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES message(STATUS "ARM detected") else() message(STATUS "x86 detected") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mavx2 -mfma -mf16c") + if (MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2 /D_CRT_SECURE_NO_WARNINGS=1") + else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mavx2 -mfma -mf16c") + endif() endif() # whisper - this is the main library of the project diff --git a/ggml.c b/ggml.c index d2853eb..6608300 100644 --- a/ggml.c +++ b/ggml.c @@ -13,9 +13,15 @@ #include #include #include -#include + +#if defined _MSC_VER +#include "msvc_thread_atomic.h" +#else #include +#include +typedef void* thread_ret_t; +#endif #define GGML_DEBUG 0 @@ -149,6 +155,25 @@ static ggml_fp16_t table_exp_f16[1 << 16]; // timing // +#if defined(_MSC_VER) +static int64_t timer_freq; +void ggml_time_init(void) { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + timer_freq = frequency.QuadPart; +} +int64_t ggml_time_ms(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return (t.QuadPart * 1000) / timer_freq; +} +int64_t ggml_time_us(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return (t.QuadPart * 1000000) / timer_freq; +} +#else +void ggml_time_init(void) {} int64_t ggml_time_ms(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); @@ -160,6 +185,7 @@ int64_t ggml_time_us(void) { clock_gettime(CLOCK_MONOTONIC, &ts); return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000; } +#endif int64_t ggml_cycles(void) { return clock(); @@ -6412,7 +6438,7 @@ void * ggml_graph_compute_one(void * data) { return NULL; } -void * ggml_graph_compute_thread(void * data) { +thread_ret_t ggml_graph_compute_thread(void * data) { struct ggml_compute_state * state = (struct ggml_compute_state *) data; const int n_threads = state->shared->n_threads; @@ -6423,7 +6449,7 @@ void * ggml_graph_compute_thread(void * data) { } else { while (atomic_load(&state->shared->has_work)) { if (atomic_load(&state->shared->stop)) { - return NULL; + return 0; } ggml_lock_lock (&state->shared->spin); ggml_lock_unlock(&state->shared->spin); @@ -6435,7 +6461,7 @@ void * ggml_graph_compute_thread(void * data) { // wait for work while (!atomic_load(&state->shared->has_work)) { if (atomic_load(&state->shared->stop)) { - return NULL; + return 0; } ggml_lock_lock (&state->shared->spin); ggml_lock_unlock(&state->shared->spin); @@ -6454,7 +6480,7 @@ void * ggml_graph_compute_thread(void * data) { } } - return NULL; + return 0; } void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { diff --git a/ggml.h b/ggml.h index 5b7b258..34f104b 100644 --- a/ggml.h +++ b/ggml.h @@ -136,6 +136,7 @@ struct ggml_init_params { void * mem_buffer; // if NULL, memory will be allocated internally }; +void ggml_time_init(void); int64_t ggml_time_ms(void); int64_t ggml_time_us(void); int64_t ggml_cycles(void); diff --git a/msvc_thread_atomic.h b/msvc_thread_atomic.h new file mode 100644 index 0000000..52cd419 --- /dev/null +++ b/msvc_thread_atomic.h @@ -0,0 +1,31 @@ +#pragma once +#include + +typedef volatile LONG atomic_int; +typedef atomic_int atomic_bool; + +static void atomic_store(atomic_int* ptr, LONG val) { + InterlockedExchange(ptr, val); +} +static LONG atomic_load(atomic_int* ptr) { + return InterlockedCompareExchange(ptr, 0, 0); +} +static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) { + return InterlockedExchangeAdd(ptr, inc); +} +static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) { + return atomic_fetch_add(ptr, -(dec)); +} + +typedef HANDLE pthread_t; + +typedef DWORD thread_ret_t; +static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) { + out = CreateThread(NULL, 0, func, arg, 0, NULL); + return out != NULL; +} + +static int pthread_join(pthread_t thread, void* unused) { + return (int) WaitForSingleObject(thread, INFINITE); +} + diff --git a/whisper.cpp b/whisper.cpp index e1eae67..91bac33 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -2073,6 +2073,8 @@ bool log_mel_spectrogram( // struct whisper_context * whisper_init(const char * path_model) { + ggml_time_init(); + whisper_context * ctx = new whisper_context; const int64_t t_start_us = ggml_time_us(); @@ -2260,7 +2262,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat switch (strategy) { case WHISPER_DECODE_GREEDY: { - result = (struct whisper_full_params) { + result = { .strategy = WHISPER_DECODE_GREEDY, .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()), .offset_ms = 0, @@ -2281,7 +2283,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat } break; case WHISPER_DECODE_BEAM_SEARCH: { - result = (struct whisper_full_params) { + result = { .strategy = WHISPER_DECODE_GREEDY, .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()), .offset_ms = 0,