Skip to content

Commit c7bb38a

Browse files
committed
llm function calling v0
1 parent 56f9f71 commit c7bb38a

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

src/inferencesh/models/llm.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,16 @@ def update_from_chunk(self, chunk: Dict[str, Any], timing: Any) -> None:
252252
self._update_tool_calls(delta_content["tool_calls"])
253253
self.finish_reason = delta.get("finish_reason")
254254

255-
# Update timing stats
256-
self.timing_stats = timing.stats
255+
# Update timing stats while preserving tokens_per_second
256+
timing_stats = timing.stats
257+
generation_time = timing_stats["generation_time"]
258+
completion_tokens = self.usage_stats.get("completion_tokens", 0)
259+
tokens_per_second = (completion_tokens / generation_time) if generation_time > 0 and completion_tokens > 0 else 0.0
260+
261+
self.timing_stats.update({
262+
**timing_stats,
263+
"tokens_per_second": tokens_per_second
264+
})
257265

258266
def _update_tool_calls(self, new_tool_calls: List[Dict[str, Any]]) -> None:
259267
"""Update tool calls, handling both full and partial updates."""

0 commit comments

Comments
 (0)