From 7a853dfd7fc8f5deaea27505b3a3cf123ab0e97e Mon Sep 17 00:00:00 2001 From: Grzegorz Grasza Date: Mon, 30 Jun 2025 22:34:55 +0200 Subject: [PATCH] New default number of tokens to predict Change the default number of tokens to predict. -1 is the default in llama.cpp, any other value will confuse people. Closes #264. --- run_inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run_inference.py b/run_inference.py index f3ab727b6..c8e8f5652 100644 --- a/run_inference.py +++ b/run_inference.py @@ -45,7 +45,7 @@ def signal_handler(sig, frame): # Usage: python run_inference.py -p "Microsoft Corporation is an American multinational corporation and technology company headquartered in Redmond, Washington." parser = argparse.ArgumentParser(description='Run inference') parser.add_argument("-m", "--model", type=str, help="Path to model file", required=False, default="models/bitnet_b1_58-3B/ggml-model-i2_s.gguf") - parser.add_argument("-n", "--n-predict", type=int, help="Number of tokens to predict when generating text", required=False, default=128) + parser.add_argument("-n", "--n-predict", type=int, help="Number of tokens to predict when generating text", required=False, default=-1) parser.add_argument("-p", "--prompt", type=str, help="Prompt to generate text from", required=True) parser.add_argument("-t", "--threads", type=int, help="Number of threads to use", required=False, default=2) parser.add_argument("-c", "--ctx-size", type=int, help="Size of the prompt context", required=False, default=2048) @@ -53,4 +53,4 @@ def signal_handler(sig, frame): parser.add_argument("-cnv", "--conversation", action='store_true', help="Whether to enable chat mode or not (for instruct models.)") args = parser.parse_args() - run_inference() \ No newline at end of file + run_inference()