diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 7dd9800..65b06ca 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -654,6 +654,7 @@ int main(int argc, char ** argv) { wparams.greedy.best_of = params.best_of; wparams.beam_search.beam_size = params.beam_size; + wparams.temperature_inc = -1; wparams.prompt_tokens = prompt_tokens.empty() ? nullptr : prompt_tokens.data(); wparams.prompt_n_tokens = prompt_tokens.empty() ? 0 : prompt_tokens.size(); diff --git a/examples/stream.wasm/emscripten.cpp b/examples/stream.wasm/emscripten.cpp index e4cdf63..144a14d 100644 --- a/examples/stream.wasm/emscripten.cpp +++ b/examples/stream.wasm/emscripten.cpp @@ -49,6 +49,9 @@ void stream_main(size_t index) { wparams.max_tokens = 32; wparams.audio_ctx = 768; // partial encoder context for better performance + // disable temperature fallback + wparams.temperature_inc = -1.0f; + wparams.language = "en"; printf("stream: using %d threads\n", wparams.n_threads); diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 3432cb5..e125170 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -615,9 +615,8 @@ int main(int argc, char ** argv) { wparams.audio_ctx = params.audio_ctx; wparams.speed_up = params.speed_up; - // disable best_of fallback + // disable temperature fallback wparams.temperature_inc = -1.0f; - wparams.greedy.best_of = -1; wparams.prompt_tokens = params.no_context ? nullptr : prompt_tokens.data(); wparams.prompt_n_tokens = params.no_context ? 0 : prompt_tokens.size();