|
|
@ -824,7 +824,8 @@ int main(int argc, char ** argv) {
|
|
|
|
params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
|
|
|
|
params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
|
|
|
|
|
|
|
|
|
|
|
|
// tokenize the reverse prompt
|
|
|
|
// tokenize the reverse prompt
|
|
|
|
std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false);
|
|
|
|
std::vector<gpt_vocab::id> antiprompt_inp;
|
|
|
|
|
|
|
|
processor.Encode(params.antiprompt, &antiprompt_inp);
|
|
|
|
|
|
|
|
|
|
|
|
printf("\n");
|
|
|
|
printf("\n");
|
|
|
|
printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
|
|
|
printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
|
|
@ -951,31 +952,30 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
|
|
|
|
|
|
// display text
|
|
|
|
// display text
|
|
|
|
if (!input_noecho) {
|
|
|
|
if (!input_noecho) {
|
|
|
|
|
|
|
|
// check if last token is unprintable token
|
|
|
|
// std::string check = processor.IdToPiece(all_tokens.at(all_tokens.size()-1));
|
|
|
|
std::string check;
|
|
|
|
// printf("[%s]", check.c_str());
|
|
|
|
std::vector<gpt_vocab::id> check_token;
|
|
|
|
// if(check != "<22>") { // ensure a multi-byte token is finished generating before outputting the text
|
|
|
|
check_token.push_back(all_tokens.at(all_tokens.size()-1));
|
|
|
|
// std::string text;
|
|
|
|
processor.Decode(check_token, &check);
|
|
|
|
// processor.Decode(all_tokens, &text);
|
|
|
|
if(check != "<EFBFBD>") {
|
|
|
|
// std::string chunk = text.substr(full_text.length());
|
|
|
|
// If the token is printable we wont attempt to print unprintable tokens
|
|
|
|
// printf("%s", chunk.c_str());
|
|
|
|
|
|
|
|
// full_text.reserve (text.size());
|
|
|
|
|
|
|
|
// full_text += chunk;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// // reset color to default if we there is no pending user input
|
|
|
|
|
|
|
|
// if (params.use_color && embd_inp.size() <= input_consumed) {
|
|
|
|
|
|
|
|
// printf(ANSI_COLOR_RESET);
|
|
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
// fflush(stdout);
|
|
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// The code above crashes and is WIP any help appreciated
|
|
|
|
|
|
|
|
std::string text;
|
|
|
|
std::string text;
|
|
|
|
processor.Decode(all_tokens, &text);
|
|
|
|
processor.Decode(all_tokens, &text);
|
|
|
|
printf("%s\n", text.c_str());
|
|
|
|
if(full_text.length() < text.length()) {
|
|
|
|
|
|
|
|
std::string chunk = text.substr(full_text.length());
|
|
|
|
|
|
|
|
printf("%s", chunk.c_str());
|
|
|
|
|
|
|
|
full_text.empty();
|
|
|
|
|
|
|
|
processor.Decode(all_tokens, &full_text);
|
|
|
|
|
|
|
|
// reset color to default if we there is no pending user input
|
|
|
|
|
|
|
|
if (params.use_color && embd_inp.size() <= input_consumed) {
|
|
|
|
|
|
|
|
printf(ANSI_COLOR_RESET);
|
|
|
|
|
|
|
|
}
|
|
|
|
fflush(stdout);
|
|
|
|
fflush(stdout);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// in interactive mode, and not currently processing queued inputs;
|
|
|
|
// in interactive mode, and not currently processing queued inputs;
|
|
|
|
// check if we should prompt the user for more
|
|
|
|
// check if we should prompt the user for more
|
|
|
|
if (params.interactive && embd_inp.size() <= input_consumed) {
|
|
|
|
if (params.interactive && embd_inp.size() <= input_consumed) {
|
|
|
|