From 3fe33d61a20ec89a50976df30431ad49c716b966 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 15 Jan 2023 11:07:52 +0200 Subject: [PATCH] stream : disable temperature fallback For real-time processing, we always want a single decoder running at T=0 --- examples/main/main.cpp | 1 + examples/stream.wasm/emscripten.cpp | 3 +++ examples/stream/stream.cpp | 3 +-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 7dd9800..65b06ca 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -654,6 +654,7 @@ int main(int argc, char ** argv) { wparams.greedy.best_of = params.best_of; wparams.beam_search.beam_size = params.beam_size; + wparams.temperature_inc = -1; wparams.prompt_tokens = prompt_tokens.empty() ? nullptr : prompt_tokens.data(); wparams.prompt_n_tokens = prompt_tokens.empty() ? 0 : prompt_tokens.size(); diff --git a/examples/stream.wasm/emscripten.cpp b/examples/stream.wasm/emscripten.cpp index e4cdf63..144a14d 100644 --- a/examples/stream.wasm/emscripten.cpp +++ b/examples/stream.wasm/emscripten.cpp @@ -49,6 +49,9 @@ void stream_main(size_t index) { wparams.max_tokens = 32; wparams.audio_ctx = 768; // partial encoder context for better performance + // disable temperature fallback + wparams.temperature_inc = -1.0f; + wparams.language = "en"; printf("stream: using %d threads\n", wparams.n_threads); diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 3432cb5..e125170 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -615,9 +615,8 @@ int main(int argc, char ** argv) { wparams.audio_ctx = params.audio_ctx; wparams.speed_up = params.speed_up; - // disable best_of fallback + // disable temperature fallback wparams.temperature_inc = -1.0f; - wparams.greedy.best_of = -1; wparams.prompt_tokens = params.no_context ? nullptr : prompt_tokens.data(); wparams.prompt_n_tokens = params.no_context ? 0 : prompt_tokens.size();