stream : disable temperature fallback

For real-time processing, we always want a single decoder running at T=0
pull/291/head
Georgi Gerganov 3 years ago
parent d83e47573b
commit 3fe33d61a2
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

@ -654,6 +654,7 @@ int main(int argc, char ** argv) {
wparams.greedy.best_of = params.best_of;
wparams.beam_search.beam_size = params.beam_size;
wparams.temperature_inc = -1;
wparams.prompt_tokens = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
wparams.prompt_n_tokens = prompt_tokens.empty() ? 0 : prompt_tokens.size();

@ -49,6 +49,9 @@ void stream_main(size_t index) {
wparams.max_tokens = 32;
wparams.audio_ctx = 768; // partial encoder context for better performance
// disable temperature fallback
wparams.temperature_inc = -1.0f;
wparams.language = "en";
printf("stream: using %d threads\n", wparams.n_threads);

@ -615,9 +615,8 @@ int main(int argc, char ** argv) {
wparams.audio_ctx = params.audio_ctx;
wparams.speed_up = params.speed_up;
// disable best_of fallback
// disable temperature fallback
wparams.temperature_inc = -1.0f;
wparams.greedy.best_of = -1;
wparams.prompt_tokens = params.no_context ? nullptr : prompt_tokens.data();
wparams.prompt_n_tokens = params.no_context ? 0 : prompt_tokens.size();

Loading…
Cancel
Save