diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0233ec4..d5a5606 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -132,6 +132,12 @@ if (WHISPER_ALL_WARNINGS)
             -Wstrict-prototypes             \
             -Wpointer-arith                 \
         ")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
+            -Wall                           \
+            -Wextra                         \
+            -Wpedantic                      \
+            -Wcast-qual                     \
+        ")
     else()
         # todo : msvc
     endif()
diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp
index a503d4c..3ab5077 100644
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@@ -33,7 +33,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
     return true;
 }
 
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
     fprintf(stderr, "\n");
     fprintf(stderr, "usage: %s [options]\n", argv[0]);
     fprintf(stderr, "\n");
diff --git a/examples/command/command.cpp b/examples/command/command.cpp
index 094e5a0..0bee82f 100644
--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@@ -81,7 +81,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
     return true;
 }
 
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
     fprintf(stderr, "\n");
     fprintf(stderr, "usage: %s [options]\n", argv[0]);
     fprintf(stderr, "\n");
@@ -387,7 +387,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
     float energy_all  = 0.0f;
     float energy_last = 0.0f;
 
-    for (size_t i = 0; i < n_samples; i++) {
+    for (int i = 0; i < n_samples; i++) {
         energy_all += fabsf(pcmf32[i]);
 
         if (i >= n_samples - n_samples_last) {
@@ -594,7 +594,7 @@ int main(int argc, char ** argv) {
             whisper_token tokens[1024];
             allowed_tokens.emplace_back();
 
-            for (int l = 0; l < cmd.size(); ++l) {
+            for (int l = 0; l < (int) cmd.size(); ++l) {
                 // NOTE: very important to add the whitespace !
                 //       the reason is that the first decoded token starts with a whitespace too!
                 std::string ss = std::string(" ") + cmd.substr(0, l + 1);
@@ -843,15 +843,15 @@ int main(int argc, char ** argv) {
 
                 // best command
                 {
+                    const auto t_end = std::chrono::high_resolution_clock::now();
+
                     fprintf(stdout, "\n");
                     fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
                             "\033[1m", allowed_commands[probs_id[0].second].c_str(), "\033[0m", probs_id[0].first,
-                            (int) std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t_start).count());
+                            (int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
                     fprintf(stdout, "\n");
                 }
 
-                const auto t_end = std::chrono::high_resolution_clock::now();
-
                 audio.clear();
             }
         }
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 374f247..b4252d4 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -129,7 +129,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
     return true;
 }
 
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
     fprintf(stderr, "\n");
     fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
     fprintf(stderr, "\n");
@@ -328,7 +328,7 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_
 // karaoke video generation
 // outputs a bash script that uses ffmpeg to generate a video with the subtitles
 // TODO: font parameter adjustments
-bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec) {
+bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & /*params*/, float t_sec) {
     std::ofstream fout(fname);
 
     fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
@@ -377,7 +377,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
             txt_ul = "\\ \\ ";
 
             {
-                int ncnt = 0;
                 for (int k = 0; k < n; ++k) {
                     const auto & token2 = tokens[k];
 
@@ -401,8 +400,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
                             txt_ul += "\\ ";
                         }
                     }
-
-                    ncnt += txt.size();
                 }
 
                 ::replace_all(txt_bg, "'", "\u2019");
@@ -637,7 +634,7 @@ int main(int argc, char ** argv) {
             {
                 static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
 
-                wparams.encoder_begin_callback = [](struct whisper_context * ctx, void * user_data) {
+                wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) {
                     bool is_aborted = *(bool*)user_data;
                     return !is_aborted;
                 };
diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp
index c8e8b74..4d4abfd 100644
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@@ -90,7 +90,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
     return true;
 }
 
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
     fprintf(stderr, "\n");
     fprintf(stderr, "usage: %s [options]\n", argv[0]);
     fprintf(stderr, "\n");
@@ -391,7 +391,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
     float energy_all  = 0.0f;
     float energy_last = 0.0f;
 
-    for (size_t i = 0; i < n_samples; i++) {
+    for (int i = 0; i < n_samples; i++) {
         energy_all += fabsf(pcmf32[i]);
 
         if (i >= n_samples - n_samples_last) {
diff --git a/examples/talk/gpt-2.cpp b/examples/talk/gpt-2.cpp
index c67551d..57ece9b 100644
--- a/examples/talk/gpt-2.cpp
+++ b/examples/talk/gpt-2.cpp
@@ -78,7 +78,7 @@ gpt_vocab::id gpt_sample_top_k_top_p(
         const float * logits,
         int    top_k,
         double top_p,
-        double temp,
+        double /*temp*/,
         std::mt19937 & rng) {
     int n_logits = vocab.id_to_token.size();
 
@@ -268,7 +268,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
             fin.read((char *) &len, sizeof(len));
 
             word.resize(len);
-            fin.read((char *) word.data(), len);
+            fin.read((char *) &word[0], len);
 
             vocab.token_to_id[word] = i;
             vocab.id_to_token[i] = word;
@@ -884,7 +884,7 @@ std::string gpt2_gen_text(gpt2_context * ctx, const char * text, int max_tokens)
 
     std::string result;
 
-    for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
+    for (int i = embd.size(); i < (int) embd_inp.size() + n_predict; i++) {
         // predict
         if (embd.size() > 0) {
             if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) {
diff --git a/examples/talk/talk.cpp b/examples/talk/talk.cpp
index 3cd730c..e6fe5c8 100644
--- a/examples/talk/talk.cpp
+++ b/examples/talk/talk.cpp
@@ -79,7 +79,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
     return true;
 }
 
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
     fprintf(stderr, "\n");
     fprintf(stderr, "usage: %s [options]\n", argv[0]);
     fprintf(stderr, "\n");
@@ -397,7 +397,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
     float energy_all  = 0.0f;
     float energy_last = 0.0f;
 
-    for (size_t i = 0; i < n_samples; i++) {
+    for (int i = 0; i < n_samples; i++) {
         energy_all += fabsf(pcmf32[i]);
 
         if (i >= n_samples - n_samples_last) {
@@ -541,7 +541,6 @@ int main(int argc, char ** argv) {
     bool force_speak = false;
 
     float prob0 = 0.0f;
-    float prob  = 0.0f;
 
     std::vector<float> pcmf32_cur;
     std::vector<float> pcmf32_prompt;
diff --git a/whisper.cpp b/whisper.cpp
index 2c5e8c2..fcdf47d 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -621,7 +621,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
     const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
 
     size_t ctx_size = 0;
-    size_t ctx_mem_size = 0;
 
     {
         const auto & hparams = model.hparams;
@@ -730,12 +729,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
             ctx_size += n_text_layer*(             n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b
         }
 
-        ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_k
-        ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_v
-
-        ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_k
-        ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_v
-
         ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
 
         fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
@@ -2043,7 +2036,7 @@ static void fft(const std::vector<float> & in, std::vector<float> & out) {
 static bool log_mel_spectrogram(
     const float * samples,
     const int n_samples,
-    const int sample_rate,
+    const int /*sample_rate*/,
     const int fft_size,
     const int fft_step,
     const int n_mel,