diff --git a/CMakeLists.txt b/CMakeLists.txt index 0233ec4..d5a5606 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,6 +132,12 @@ if (WHISPER_ALL_WARNINGS) -Wstrict-prototypes \ -Wpointer-arith \ ") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ + -Wall \ + -Wextra \ + -Wpedantic \ + -Wcast-qual \ + ") else() # todo : msvc endif() diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index a503d4c..3ab5077 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -33,7 +33,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { return true; } -void whisper_print_usage(int argc, char ** argv, const whisper_params & params) { +void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) { fprintf(stderr, "\n"); fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 094e5a0..0bee82f 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -81,7 +81,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { return true; } -void whisper_print_usage(int argc, char ** argv, const whisper_params & params) { +void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) { fprintf(stderr, "\n"); fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); @@ -387,7 +387,7 @@ bool vad_simple(std::vector & pcmf32, int sample_rate, int last_ms, float float energy_all = 0.0f; float energy_last = 0.0f; - for (size_t i = 0; i < n_samples; i++) { + for (int i = 0; i < n_samples; i++) { energy_all += fabsf(pcmf32[i]); if (i >= n_samples - n_samples_last) { @@ -594,7 +594,7 @@ int main(int argc, char ** argv) { whisper_token tokens[1024]; allowed_tokens.emplace_back(); - for (int l = 0; l < cmd.size(); ++l) { + for (int l = 0; l < (int) cmd.size(); ++l) { // NOTE: very important to add the whitespace ! // the reason is that the first decoded token starts with a whitespace too! std::string ss = std::string(" ") + cmd.substr(0, l + 1); @@ -843,15 +843,15 @@ int main(int argc, char ** argv) { // best command { + const auto t_end = std::chrono::high_resolution_clock::now(); + fprintf(stdout, "\n"); fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__, "\033[1m", allowed_commands[probs_id[0].second].c_str(), "\033[0m", probs_id[0].first, - (int) std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - t_start).count()); + (int) std::chrono::duration_cast(t_end - t_start).count()); fprintf(stdout, "\n"); } - const auto t_end = std::chrono::high_resolution_clock::now(); - audio.clear(); } } diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 374f247..b4252d4 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -129,7 +129,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { return true; } -void whisper_print_usage(int argc, char ** argv, const whisper_params & params) { +void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) { fprintf(stderr, "\n"); fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]); fprintf(stderr, "\n"); @@ -328,7 +328,7 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_ // karaoke video generation // outputs a bash script that uses ffmpeg to generate a video with the subtitles // TODO: font parameter adjustments -bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec) { +bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & /*params*/, float t_sec) { std::ofstream fout(fname); fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname); @@ -377,7 +377,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f txt_ul = "\\ \\ "; { - int ncnt = 0; for (int k = 0; k < n; ++k) { const auto & token2 = tokens[k]; @@ -401,8 +400,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f txt_ul += "\\ "; } } - - ncnt += txt.size(); } ::replace_all(txt_bg, "'", "\u2019"); @@ -637,7 +634,7 @@ int main(int argc, char ** argv) { { static bool is_aborted = false; // NOTE: this should be atomic to avoid data race - wparams.encoder_begin_callback = [](struct whisper_context * ctx, void * user_data) { + wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) { bool is_aborted = *(bool*)user_data; return !is_aborted; }; diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index c8e8b74..4d4abfd 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -90,7 +90,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { return true; } -void whisper_print_usage(int argc, char ** argv, const whisper_params & params) { +void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) { fprintf(stderr, "\n"); fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); @@ -391,7 +391,7 @@ bool vad_simple(std::vector & pcmf32, int sample_rate, int last_ms, float float energy_all = 0.0f; float energy_last = 0.0f; - for (size_t i = 0; i < n_samples; i++) { + for (int i = 0; i < n_samples; i++) { energy_all += fabsf(pcmf32[i]); if (i >= n_samples - n_samples_last) { diff --git a/examples/talk/gpt-2.cpp b/examples/talk/gpt-2.cpp index c67551d..57ece9b 100644 --- a/examples/talk/gpt-2.cpp +++ b/examples/talk/gpt-2.cpp @@ -78,7 +78,7 @@ gpt_vocab::id gpt_sample_top_k_top_p( const float * logits, int top_k, double top_p, - double temp, + double /*temp*/, std::mt19937 & rng) { int n_logits = vocab.id_to_token.size(); @@ -268,7 +268,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & fin.read((char *) &len, sizeof(len)); word.resize(len); - fin.read((char *) word.data(), len); + fin.read((char *) &word[0], len); vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; @@ -884,7 +884,7 @@ std::string gpt2_gen_text(gpt2_context * ctx, const char * text, int max_tokens) std::string result; - for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) { + for (int i = embd.size(); i < (int) embd_inp.size() + n_predict; i++) { // predict if (embd.size() > 0) { if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) { diff --git a/examples/talk/talk.cpp b/examples/talk/talk.cpp index 3cd730c..e6fe5c8 100644 --- a/examples/talk/talk.cpp +++ b/examples/talk/talk.cpp @@ -79,7 +79,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { return true; } -void whisper_print_usage(int argc, char ** argv, const whisper_params & params) { +void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) { fprintf(stderr, "\n"); fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); @@ -397,7 +397,7 @@ bool vad_simple(std::vector & pcmf32, int sample_rate, int last_ms, float float energy_all = 0.0f; float energy_last = 0.0f; - for (size_t i = 0; i < n_samples; i++) { + for (int i = 0; i < n_samples; i++) { energy_all += fabsf(pcmf32[i]); if (i >= n_samples - n_samples_last) { @@ -541,7 +541,6 @@ int main(int argc, char ** argv) { bool force_speak = false; float prob0 = 0.0f; - float prob = 0.0f; std::vector pcmf32_cur; std::vector pcmf32_prompt; diff --git a/whisper.cpp b/whisper.cpp index 2c5e8c2..fcdf47d 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -621,7 +621,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32; size_t ctx_size = 0; - size_t ctx_mem_size = 0; { const auto & hparams = model.hparams; @@ -730,12 +729,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b } - ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_k - ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_v - - ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_k - ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_v - ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0)); @@ -2043,7 +2036,7 @@ static void fft(const std::vector & in, std::vector & out) { static bool log_mel_spectrogram( const float * samples, const int n_samples, - const int sample_rate, + const int /*sample_rate*/, const int fft_size, const int fft_step, const int n_mel,