File tree Expand file tree Collapse file tree 1 file changed +6
-0
lines changed
Expand file tree Collapse file tree 1 file changed +6
-0
lines changed Original file line number Diff line number Diff line change @@ -341,6 +341,9 @@ def clean_text(self, text: str) -> str:
341341 Returns:
342342 Cleaned text with common and model-specific tokens removed
343343 """
344+ if text is None :
345+ return ""
346+
344347 # Common token cleaning across most models
345348 cleaned = (text .replace ("<|im_end|>" , "" )
346349 .replace ("<|im_start|>" , "" )
@@ -471,6 +474,7 @@ def stream_generate(
471474 top_p : float = 0.95 ,
472475 max_tokens : int = 4096 ,
473476 stop : Optional [List [str ]] = None ,
477+ verbose : bool = False ,
474478) -> Generator [LLMOutput , None , None ]:
475479 """Stream generate from LLaMA.cpp model with timing and usage tracking."""
476480 with timing_context () as timing :
@@ -498,6 +502,8 @@ def stream_generate(
498502 completion = model .create_chat_completion (** completion_kwargs )
499503
500504 for chunk in completion :
505+ if verbose :
506+ print (chunk )
501507 # Mark first token time as soon as we get any response
502508 if not timing .first_token_time :
503509 timing .mark_first_token ()
You can’t perform that action at this time.
0 commit comments