Skip to content

Commit 56f9f71

Browse files
committed
llm function calling v0
1 parent 8f62c9f commit 56f9f71

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

src/inferencesh/models/llm.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,9 @@ def clean_text(self, text: str) -> str:
341341
Returns:
342342
Cleaned text with common and model-specific tokens removed
343343
"""
344+
if text is None:
345+
return ""
346+
344347
# Common token cleaning across most models
345348
cleaned = (text.replace("<|im_end|>", "")
346349
.replace("<|im_start|>", "")
@@ -471,6 +474,7 @@ def stream_generate(
471474
top_p: float = 0.95,
472475
max_tokens: int = 4096,
473476
stop: Optional[List[str]] = None,
477+
verbose: bool = False,
474478
) -> Generator[LLMOutput, None, None]:
475479
"""Stream generate from LLaMA.cpp model with timing and usage tracking."""
476480
with timing_context() as timing:
@@ -498,6 +502,8 @@ def stream_generate(
498502
completion = model.create_chat_completion(**completion_kwargs)
499503

500504
for chunk in completion:
505+
if verbose:
506+
print(chunk)
501507
# Mark first token time as soon as we get any response
502508
if not timing.first_token_time:
503509
timing.mark_first_token()

0 commit comments

Comments
 (0)