From b4bf5dff681fc14c18aeffbd29224027eea26bea Mon Sep 17 00:00:00 2001 From: VJHack Date: Sun, 27 Oct 2024 22:27:56 -0500 Subject: [PATCH] generate token until context filled --- examples/server/server.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index a43087a7fe97f9..a4e6be94eea838 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -219,14 +219,15 @@ struct server_slot { if (params.n_predict == -1 && global_params.n_predict == -1) { return true; // limitless } + else if (global_params.n_predict == -2) { + return true; // generate until context is filled + } n_remaining = -1; if (params.n_predict != -1) { n_remaining = params.n_predict - n_decoded; - } else if (global_params.n_predict == -2) { - n_remaining = n_ctx - n_past; - } else if (global_params.n_predict != -1) { + }else if (global_params.n_predict != -1) { n_remaining = global_params.n_predict - n_decoded; } @@ -1814,6 +1815,12 @@ struct server_context { continue; } + if (params.n_predict == -2) { + slot.release(); + send_final_response(slot); + continue; + } + // Shift context const int n_keep = slot.params.n_keep + add_bos_token; const int n_left = slot.n_past - n_keep;