Add OpenBLAS support

Supported via CMake - just add:

cmake .. -DWHISPER_SUPPORT_OPENBLAS=ON

On Ubuntu, you have to install the library like this:

apt install libopenblas-dev

Unfortunately, I don't observe any benefit compared to the
original AVX2 + FP16 implementation. Maybe I'm missing something
pull/107/head
Georgi Gerganov 2 years ago
parent ebb01b9e33
commit fbd513b813

@ -41,8 +41,13 @@ option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STAND
option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF) option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
option(WHISPER_PERF "whisper: enable perf timings" OFF) if (APPLE)
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF) option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
else()
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
endif()
option(WHISPER_PERF "whisper: enable perf timings" OFF)
# sanitizers # sanitizers
@ -86,6 +91,18 @@ if (APPLE AND NOT WHISPER_NO_ACCELERATE)
endif() endif()
endif() endif()
if (WHISPER_SUPPORT_OPENBLAS)
find_library(OPENBLAS_LIB openblas)
if (OPENBLAS_LIB)
message(STATUS "OpenBLAS found")
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${OPENBLAS_LIB})
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
else()
message(WARNING "OpenBLAS not found")
endif()
endif()
# compiler flags # compiler flags
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)

@ -76,6 +76,8 @@ typedef void* thread_ret_t;
#ifdef GGML_USE_ACCELERATE #ifdef GGML_USE_ACCELERATE
#include <Accelerate/Accelerate.h> #include <Accelerate/Accelerate.h>
#elif GGML_USE_OPENBLAS
#include <cblas.h>
#endif #endif
// floating point type used to accumulate sums // floating point type used to accumulate sums
@ -4055,46 +4057,44 @@ void ggml_compute_forward_mul_mat_f32(
// nb00 < nb01 - src0 is transposed // nb00 < nb01 - src0 is transposed
// compute by src0 columns // compute by src0 columns
//#ifdef GGML_USE_ACCELERATE #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
// if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
// GGML_ASSERT(ggml_is_contiguous(src0)); GGML_ASSERT(ggml_is_contiguous(src0));
// GGML_ASSERT(nb10 == sizeof(float)); GGML_ASSERT(nb10 == sizeof(float));
//
// if (params->ith != 0) return; if (params->ith != 0) return;
//
// if (params->type == GGML_TASK_INIT) { if (params->type == GGML_TASK_INIT) {
// return; return;
// } }
//
// if (params->type == GGML_TASK_FINALIZE) { if (params->type == GGML_TASK_FINALIZE) {
// return; return;
// } }
//
// float * const wdata = params->wdata; for (int i03 = 0; i03 < ne03; i03++) {
// for (int i02 = 0; i02 < ne02; i02++) {
// for (int i03 = 0; i03 < ne03; i03++) { const float * x = (float *) (src0->data);
// for (int i02 = 0; i02 < ne02; i02++) { const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
// const float * x = (float *) (src0->data);
// const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13); float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
//
// float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); // zT = y * xT
// {
// // zT = y * xT cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
// { ne11, ne01, ne10,
// cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 1.0f, y, ne10,
// ne11, ne01, ne10, x, ne10,
// 1.0f, y, ne10, 0.0f, d, ne01);
// x, ne10, }
// 0.0f, d, ne01); }
// } }
// }
// } //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
//
// //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3); return;
// }
// return; #endif
// }
//#endif
if (params->type == GGML_TASK_INIT) { if (params->type == GGML_TASK_INIT) {
if (nb01 >= nb00) { if (nb01 >= nb00) {
@ -4301,7 +4301,7 @@ void ggml_compute_forward_mul_mat_f16_f32(
// nb00 < nb01 - src0 is transposed // nb00 < nb01 - src0 is transposed
// compute by src0 columns // compute by src0 columns
#ifdef GGML_USE_ACCELERATE #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
GGML_ASSERT(nb10 == sizeof(float)); GGML_ASSERT(nb10 == sizeof(float));
@ -6857,7 +6857,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
} else { } else {
if (node->src0->type == GGML_TYPE_F16 && if (node->src0->type == GGML_TYPE_F16 &&
node->src1->type == GGML_TYPE_F32) { node->src1->type == GGML_TYPE_F32) {
#ifdef GGML_USE_ACCELERATE #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]); cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
} else { } else {
@ -8074,7 +8074,7 @@ int ggml_cpu_has_wasm_simd(void) {
} }
int ggml_cpu_has_blas(void) { int ggml_cpu_has_blas(void) {
#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
return 1; return 1;
#else #else
return 0; return 0;

Loading…
Cancel
Save