diff --git a/README.md b/README.md index 21f0b1c..0aa0717 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp - AVX intrinsics support for x86 architectures - VSX intrinsics support for POWER architectures - Mixed F16 / F32 precision -- Low memory usage (Flash Attention + Flash Forward) +- Low memory usage (Flash Attention) - Zero memory allocations at runtime - Runs on the CPU - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h) diff --git a/whisper.cpp b/whisper.cpp index 21e559f..3c73125 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -619,6 +619,7 @@ struct whisper_context { buf_last = i; #else (void) i; + (void) ctx; #endif } @@ -1631,7 +1632,7 @@ static bool whisper_encode( wctx.use_buf(ctx0, 0); cur = ggml_flash_ff(ctx0, - ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wctx.wtype, n_state, N)), + ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wctx.wtype, n_state, n_ctx)), layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b); #else wctx.use_buf(ctx0, 0);