From 4e0e2520f08a3197ed36834245f4dd5475981b72 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 25 Jan 2023 21:48:12 +0200
Subject: [PATCH] whisper : fix FF + remove it from README

---
 README.md   | 2 +-
 whisper.cpp | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 21f0b1c..0aa0717 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
 - AVX intrinsics support for x86 architectures
 - VSX intrinsics support for POWER architectures
 - Mixed F16 / F32 precision
-- Low memory usage (Flash Attention + Flash Forward)
+- Low memory usage (Flash Attention)
 - Zero memory allocations at runtime
 - Runs on the CPU
 - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
diff --git a/whisper.cpp b/whisper.cpp
index 21e559f..3c73125 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -619,6 +619,7 @@ struct whisper_context {
         buf_last = i;
 #else
         (void) i;
+        (void) ctx;
 #endif
     }
 
@@ -1631,7 +1632,7 @@ static bool whisper_encode(
             wctx.use_buf(ctx0, 0);
 
             cur = ggml_flash_ff(ctx0,
-                    ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wctx.wtype, n_state, N)),
+                    ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wctx.wtype, n_state, n_ctx)),
                     layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
 #else
             wctx.use_buf(ctx0, 0);