From e500c71aa9b0bf1e9511cbbeaae2804c39c6be85 Mon Sep 17 00:00:00 2001 From: Maska Chung Date: Fri, 24 Oct 2025 03:06:21 +0100 Subject: [PATCH 1/4] added synthetic tool --- .../function_calls/impl/synthetic_tool.py | 331 +++++++++++++ .../llm_function_calls/test_synthetic_tool.py | 453 ++++++++++++++++++ ...nction_call_assistant_synthetic_example.py | 132 +++++ 3 files changed, 916 insertions(+) create mode 100644 grafi/tools/function_calls/impl/synthetic_tool.py create mode 100644 tests/tools/llm_function_calls/test_synthetic_tool.py create mode 100644 tests_integration/function_call_assistant/simple_function_call_assistant_synthetic_example.py diff --git a/grafi/tools/function_calls/impl/synthetic_tool.py b/grafi/tools/function_calls/impl/synthetic_tool.py new file mode 100644 index 0000000..0778689 --- /dev/null +++ b/grafi/tools/function_calls/impl/synthetic_tool.py @@ -0,0 +1,331 @@ +import inspect +import json +from typing import Any, AsyncGenerator, Dict, List + +from openai import OpenAIError +from openinference.semconv.trace import OpenInferenceSpanKindValues +from pydantic import BaseModel, field_validator + +from grafi.common.decorators.record_decorators import record_tool_invoke +from grafi.common.models.function_spec import FunctionSpec, ParametersSchema +from grafi.common.models.invoke_context import InvokeContext +from grafi.common.models.message import Message, Messages +from grafi.tools.function_calls.function_call_tool import ( + FunctionCallTool, + FunctionCallToolBuilder, +) + +try: + from openai import AsyncOpenAI +except ImportError: + raise ImportError( + "`openai` not installed. Please install using `pip install openai`" + ) + +class SyntheticTool(FunctionCallTool): + name: str = "SyntheticTool" + type: str = "SyntheticTool" + tool_name: str = "" + description: str = "" + input_model: Any = "" + output_model: Any = "" + model: str = "" + openai_api_key: str = "" + oi_span_type: OpenInferenceSpanKindValues = OpenInferenceSpanKindValues.TOOL + + @field_validator("input_model", "output_model") + @classmethod + def validate_pydantic_model_or_schema(cls, v: Any, info) -> Any: + """ + Validate that input_model and output_model are either: + - A Pydantic BaseModel class (not instance) - for type-safe Python usage + - A JSON schema dict - for flexible schema definition + - An empty string (for optional models) + + Both Pydantic models and JSON schemas are fully supported for LLM invocation + with strict validation enabled. + + Args: + v: The value to validate + info: Pydantic validation info containing field name + + Returns: + The validated value + + Raises: + ValueError: If the value is not a valid type (e.g., int, str, instances) + """ + if v == "": + return v + + if isinstance(v, dict): + return v + + if inspect.isclass(v) and issubclass(v, BaseModel): + return v + + field_name = info.field_name + raise ValueError( + f"{field_name} must be a Pydantic BaseModel class, " + f"a dict schema, or an empty string. " + f"Got: {type(v).__name__}" + ) + + def model_post_init(self, _context: Any) -> None: + if self.input_model: + # Handle both dict schemas and Pydantic models + if isinstance(self.input_model, dict): + input_schema = self.input_model + else: + input_schema = self.input_model.model_json_schema() + + self.function_specs.append( + FunctionSpec( + name=self.tool_name, + description=self.description, + parameters=ParametersSchema(**input_schema), + ) + ) + + @property + def input_schema(self) -> Dict[str, Any]: + """Get input schema from Pydantic model.""" + if self.input_model: + if isinstance(self.input_model, dict): + return self.input_model + return self.input_model.model_json_schema() + return {} + + @property + def output_schema(self) -> Dict[str, Any]: + """Get output schema from Pydantic model.""" + if self.output_model: + if isinstance(self.output_model, dict): + return self.output_model + return self.output_model.model_json_schema() + return {} + + @classmethod + def builder(cls) -> "SyntheticToolBuilder": + """ + Return a builder for SyntheticTool. + This method allows for the construction of an SyntheticTool instance with specified parameters. + """ + return SyntheticToolBuilder(cls) + + @record_tool_invoke + async def invoke( + self, invoke_context: InvokeContext, input_data: Messages, + ) -> AsyncGenerator[Messages, None]: + """ + Invokes the synthetic tool by processing incoming tool calls and generating + LLM-based responses for each matching invocation. + + Args: + invoke_context (InvokeContext): The context for this invocation. + input_data (Messages): A list of incoming messages that may contain tool calls. + + Yields: + AsyncGenerator[Messages, None]: A stream of messages representing the + responses from the LLM for each valid tool call. + + Raises: + ValueError: If no tool_calls are found in the input data. + """ + input_msg = input_data[0] + if input_msg.tool_calls is None: + raise ValueError("No tool_calls found for SyntheticTool invocation.") + + messages: List[Message] = [] + + for tool_call in input_msg.tool_calls: + if tool_call.function.name != self.tool_name: + continue + + args = json.loads(tool_call.function.arguments) + prompt = self._make_prompt(args) + response = await self._call_llm(prompt) + messages.extend( + self.to_messages(response=response, tool_call_id=tool_call.id) + ) + + yield messages + + def _make_prompt(self, user_input: Dict[str, Any]) -> str: + """Builds the synthetic execution prompt.""" + return f""" + You are a synthetic tool named "{self.tool_name}". + Description: {self.description} + + INPUT SCHEMA: + {json.dumps(self.input_schema, indent=2)} + + OUTPUT SCHEMA: + {json.dumps(self.output_schema, indent=2)} + + USER INPUT: + {json.dumps(user_input, indent=2)} + + Return ONLY a JSON object that strictly conforms to the OUTPUT schema. + """ + + @staticmethod + def ensure_strict_schema(schema: Dict[str, Any]) -> Dict[str, Any]: + """ + Recursively ensure schema is compatible with OpenAI strict mode. + + Adds 'additionalProperties': false to all objects, which is required + for OpenAI's structured outputs strict mode. + + Args: + schema: JSON schema dict + + Returns: + Modified schema with strict mode requirements + """ + schema = schema.copy() + + if schema.get("type") == "object": + schema["additionalProperties"] = False + if "properties" in schema: + schema["properties"] = { + k: SyntheticTool.ensure_strict_schema(v) + for k, v in schema["properties"].items() + } + elif schema.get("type") == "array": + if "items" in schema: + schema["items"] = SyntheticTool.ensure_strict_schema(schema["items"]) + + return schema + + async def _call_llm(self, prompt: str) -> str: + """ + Calls OpenAI with structured output. + Supports both Pydantic models and JSON schemas. + """ + try: + if not self.output_model: + raise ValueError("output_model must be set to call LLM") + + client = AsyncOpenAI(api_key=self.openai_api_key) + + # If output model is json (dict) + if isinstance(self.output_model, dict): + # Ensure schema is compatible with strict mode + strict_schema = self.ensure_strict_schema(self.output_model) + + response_format = { + "type": "json_schema", + "json_schema": { + "name": f"{self.tool_name}_output", + "schema": strict_schema, + "strict": True + } + } + + # Use standard chat completion (not parse) + completion = await client.chat.completions.create( + model=self.model, + messages=[{"role": "user", "content": prompt}], + response_format=response_format, + ) + + content = completion.choices[0].message.content + if not content: + return json.dumps({"error": "Empty response"}) + + return content + + # If output model is pydantic model + else: + # Use Pydantic mode with parse + completion = await client.beta.chat.completions.parse( + model=self.model, + messages=[{"role": "user", "content": prompt}], + response_format=self.output_model, + ) + + parsed_response = completion.choices[0].message.parsed + + if not parsed_response: + return json.dumps({"error": "Empty response"}) + + # Return as JSON string + return parsed_response.model_dump_json() + + except OpenAIError as exc: + return json.dumps({"error": f"OpenAI API error: {str(exc)}"}) + + except Exception as e: + return json.dumps({"error": f"LLM call failed: {str(e)}"}) + + def to_dict(self) -> Dict[str, Any]: + """ + Convert the tool instance to a dictionary representation. + + Returns: + Dict[str, Any]: A dictionary representation of the tool. + """ + return { + **super().to_dict(), + "tool_name": self.tool_name, + "description": self.description, + "input_schema": self.input_schema, + "output_schema": self.output_schema, + "model": self.model, + } + + @classmethod + async def from_dict(cls, data: Dict[str, Any]) -> "SyntheticTool": + """ + Create a SyntheticTool instance from a dictionary representation. + + Args: + data (dict[str, Any]): A dictionary representation of the SyntheticTool. + + Returns: + SyntheticTool: A SyntheticTool instance created from the dictionary. + + Note: + The client needs to be recreated with an API key from environment + or other secure source as API keys are masked in serialization. + """ + return ( + cls.builder() + .tool_name(data.get("tool_name", "synthetic_tool")) + .description(data.get("description", "")) + .input_model(data.get("input_schema", {})) + .output_model(data.get("output_schema", {})) + .model(data.get("model", "gpt-5-mini")) + .openai_api_key(data.get("openai_api_key", "")) + .oi_span_type(OpenInferenceSpanKindValues(data.get("oi_span_type", "TOOL"))) + .build() + ) + +class SyntheticToolBuilder(FunctionCallToolBuilder[SyntheticTool]): + """Builder for SyntheticTool instances.""" + + def tool_name(self, name: str) -> "SyntheticToolBuilder": + self.kwargs["tool_name"] = name + self.kwargs["name"] = name + return self + + def description(self, desc: str) -> "SyntheticToolBuilder": + self.kwargs["description"] = desc + return self + + def input_model(self, model: type[BaseModel]) -> "SyntheticToolBuilder": + self.kwargs["input_model"] = model + return self + + def output_model(self, model: type[BaseModel]) -> "SyntheticToolBuilder": + self.kwargs["output_model"] = model + return self + + def model(self, model: str) -> "SyntheticToolBuilder": + self.kwargs["model"] = model + return self + + def openai_api_key(self, openai_api_key: str) -> "SyntheticToolBuilder": + self.kwargs["openai_api_key"] = openai_api_key + return self diff --git a/tests/tools/llm_function_calls/test_synthetic_tool.py b/tests/tools/llm_function_calls/test_synthetic_tool.py new file mode 100644 index 0000000..3364c20 --- /dev/null +++ b/tests/tools/llm_function_calls/test_synthetic_tool.py @@ -0,0 +1,453 @@ +import json +import uuid +from unittest.mock import AsyncMock, Mock, patch + +import pytest +from pydantic import BaseModel + +from grafi.common.models.invoke_context import InvokeContext +from grafi.common.models.message import Message +from grafi.tools.function_calls.impl.synthetic_tool import SyntheticTool + + +class WeatherInput(BaseModel): + location: str + units: str = "celsius" + + +class WeatherOutput(BaseModel): + temperature: float + conditions: str + location: str + + +@pytest.fixture +def synthetic_tool() -> SyntheticTool: + return ( + SyntheticTool.builder() + .tool_name("get_weather") + .description("Get the current weather for a location") + .input_model(WeatherInput) + .output_model(WeatherOutput) + .model("gpt-4o-mini") + .openai_api_key("test_api_key") + .build() + ) + + +@pytest.fixture +def invoke_context(): + return InvokeContext( + conversation_id="conversation_id", + invoke_id=uuid.uuid4().hex, + assistant_request_id=uuid.uuid4().hex, + ) + + +def test_synthetic_tool_initialization(synthetic_tool): + """Test that SyntheticTool initializes correctly with all properties.""" + assert synthetic_tool.name == "get_weather" + assert synthetic_tool.type == "SyntheticTool" + assert synthetic_tool.tool_name == "get_weather" + assert synthetic_tool.description == "Get the current weather for a location" + assert synthetic_tool.model == "gpt-4o-mini" + assert synthetic_tool.openai_api_key == "test_api_key" + assert synthetic_tool.input_model == WeatherInput + assert synthetic_tool.output_model == WeatherOutput + + +def test_input_schema_property(synthetic_tool): + """Test that input_schema property returns correct JSON schema.""" + schema = synthetic_tool.input_schema + assert isinstance(schema, dict) + assert "properties" in schema + assert "location" in schema["properties"] + assert "units" in schema["properties"] + assert schema["properties"]["location"]["type"] == "string" + + +def test_output_schema_property(synthetic_tool): + """Test that output_schema property returns correct JSON schema.""" + schema = synthetic_tool.output_schema + assert isinstance(schema, dict) + assert "properties" in schema + assert "temperature" in schema["properties"] + assert "conditions" in schema["properties"] + assert "location" in schema["properties"] + + +def test_get_function_specs(synthetic_tool): + """Test that function specs are correctly generated from input model.""" + specs = synthetic_tool.get_function_specs() + assert len(specs) == 1 + assert specs[0].name == "get_weather" + assert specs[0].description == "Get the current weather for a location" + assert specs[0].parameters.type == "object" + assert "location" in specs[0].parameters.properties + assert "units" in specs[0].parameters.properties + + +def test_make_prompt(synthetic_tool): + """Test that _make_prompt generates correct prompt structure.""" + user_input = {"location": "San Francisco", "units": "celsius"} + prompt = synthetic_tool._make_prompt(user_input) + + assert "get_weather" in prompt + assert "Get the current weather for a location" in prompt + assert "INPUT SCHEMA:" in prompt + assert "OUTPUT SCHEMA:" in prompt + assert "San Francisco" in prompt + assert "celsius" in prompt + + +@pytest.mark.asyncio +async def test_invoke_successful(synthetic_tool, invoke_context): + """Test successful invocation with mocked LLM response.""" + mock_parsed_response = WeatherOutput( + temperature=22.5, + conditions="Sunny", + location="San Francisco" + ) + + mock_message = Mock() + mock_message.parsed = mock_parsed_response + + mock_choice = Mock() + mock_choice.message = mock_message + + mock_completion = Mock() + mock_completion.choices = [mock_choice] + + mock_client = AsyncMock() + mock_client.beta.chat.completions.parse = AsyncMock(return_value=mock_completion) + + with patch("grafi.tools.function_calls.impl.synthetic_tool.AsyncOpenAI", return_value=mock_client): + input_data = [ + Message( + role="assistant", + tool_calls=[ + { + "id": "call_123", + "type": "function", + "function": { + "name": "get_weather", + "arguments": '{"location": "San Francisco", "units": "celsius"}', + }, + } + ], + ) + ] + + result = [] + async for msg in synthetic_tool.invoke(invoke_context, input_data): + result.extend(msg) + + assert len(result) == 1 + assert result[0].role == "tool" + assert result[0].tool_call_id == "call_123" + + response_data = json.loads(result[0].content) + assert response_data["temperature"] == 22.5 + assert response_data["conditions"] == "Sunny" + assert response_data["location"] == "San Francisco" + + +@pytest.mark.asyncio +async def test_invoke_no_tool_calls(synthetic_tool, invoke_context): + """Test that invoke raises ValueError when no tool_calls are present.""" + input_data = [Message(role="assistant", content="No tool calls here")] + + with pytest.raises(ValueError, match="No tool_calls found"): + async for msg in synthetic_tool.invoke(invoke_context, input_data): + pass + + +@pytest.mark.asyncio +async def test_invoke_invalud_function_name(synthetic_tool, invoke_context): + """Test that invoke skips tool calls with non-matching function names.""" + input_data = [ + Message( + role="assistant", + tool_calls=[ + { + "id": "call_123", + "type": "function", + "function": { + "name": "wrong_function", + "arguments": '{"location": "San Francisco"}', + }, + } + ], + ) + ] + + result = [] + async for msg in synthetic_tool.invoke(invoke_context, input_data): + result.extend(msg) + + assert len(result) == 0 + + +@pytest.mark.asyncio +async def test_invoke_multiple_tool_calls(synthetic_tool, invoke_context): + """Test invoke with multiple tool calls.""" + mock_parsed_response = WeatherOutput( + temperature=22.5, + conditions="Sunny", + location="San Francisco" + ) + + mock_message = Mock() + mock_message.parsed = mock_parsed_response + + mock_choice = Mock() + mock_choice.message = mock_message + + mock_completion = Mock() + mock_completion.choices = [mock_choice] + + mock_client = AsyncMock() + mock_client.beta.chat.completions.parse = AsyncMock(return_value=mock_completion) + + with patch("grafi.tools.function_calls.impl.synthetic_tool.AsyncOpenAI", return_value=mock_client): + input_data = [ + Message( + role="assistant", + tool_calls=[ + { + "id": "call_1", + "type": "function", + "function": { + "name": "get_weather", + "arguments": '{"location": "San Francisco"}', + }, + }, + { + "id": "call_2", + "type": "function", + "function": { + "name": "get_weather", + "arguments": '{"location": "New York"}', + }, + }, + ], + ) + ] + + result = [] + async for msg in synthetic_tool.invoke(invoke_context, input_data): + result.extend(msg) + + assert len(result) == 2 + assert result[0].tool_call_id == "call_1" + assert result[1].tool_call_id == "call_2" + + +def test_to_messages(synthetic_tool): + """Test to_messages creates proper Message objects.""" + response = '{"temperature": 22.5, "conditions": "Sunny", "location": "SF"}' + result = synthetic_tool.to_messages(response=response, tool_call_id="call_123") + + assert len(result) == 1 + assert result[0].role == "tool" + assert result[0].content == response + assert result[0].tool_call_id == "call_123" + + +def test_to_dict(synthetic_tool): + """Test serialization to dictionary.""" + result = synthetic_tool.to_dict() + + assert isinstance(result, dict) + assert result["tool_name"] == "get_weather" + assert result["description"] == "Get the current weather for a location" + assert result["model"] == "gpt-4o-mini" + assert "input_schema" in result + assert "output_schema" in result + assert isinstance(result["input_schema"], dict) + assert isinstance(result["output_schema"], dict) + + +@pytest.mark.asyncio +async def test_from_dict(): + """Test deserialization from dictionary.""" + data = { + "class": "SyntheticTool", + "tool_id": "test-id", + "name": "get_weather", + "type": "SyntheticTool", + "oi_span_type": "TOOL", + "tool_name": "get_weather", + "description": "Get weather data", + "input_schema": {"type": "object", "properties": {}}, + "output_schema": {"type": "object", "properties": {}}, + "model": "gpt-4o-mini", + "openai_api_key": "test_key", + } + + tool = await SyntheticTool.from_dict(data) + + assert isinstance(tool, SyntheticTool) + assert tool.tool_name == "get_weather" + assert tool.description == "Get weather data" + assert tool.model == "gpt-4o-mini" + # Note: the deserialized tool will have dict schemas, not Pydantic models + # This means it can't actually invoke the LLM without re-setting the models + + +def test_field_validator_rejects_invalid_input_model(): + """Test that field validator rejects invalid input_model at initialization.""" + with pytest.raises(ValueError, match="input_model must be a Pydantic BaseModel class"): + SyntheticTool.builder().tool_name("test").input_model(123).build() + + +def test_field_validator_rejects_invalid_output_model(): + """Test that field validator rejects invalid output_model at initialization.""" + with pytest.raises(ValueError, match="output_model must be a Pydantic BaseModel class"): + SyntheticTool.builder().tool_name("test").output_model("invalid_string").build() + + +def test_field_validator_rejects_model_instance(): + """Test that field validator rejects Pydantic model instances (not classes).""" + instance = WeatherInput(location="SF") + + with pytest.raises(ValueError, match="input_model must be a Pydantic BaseModel class"): + SyntheticTool.builder().tool_name("test").input_model(instance).build() + + +def test_field_validator_accepts_valid_pydantic_class(): + """Test that field validator accepts valid Pydantic model classes.""" + tool = ( + SyntheticTool.builder() + .tool_name("test") + .input_model(WeatherInput) + .output_model(WeatherOutput) + .model("gpt-4") + .openai_api_key("key") + .build() + ) + + assert tool.input_model == WeatherInput + assert tool.output_model == WeatherOutput + + +def test_field_validator_accepts_dict_schema(): + """Test that field validator accepts dict schemas (for flexible schema definition).""" + tool = ( + SyntheticTool.builder() + .tool_name("test") + .input_model({"type": "object", "properties": {}}) + .output_model({"type": "object", "properties": {}}) + .model("gpt-4") + .openai_api_key("key") + .build() + ) + + assert isinstance(tool.input_model, dict) + assert isinstance(tool.output_model, dict) + + +@pytest.mark.asyncio +async def test_invoke_with_json_schema_output(invoke_context): + """Test invocation with JSON schema output model (not Pydantic).""" + json_output_schema = { + "type": "object", + "properties": { + "result": {"type": "string"}, + "confidence": {"type": "number"} + }, + "required": ["result", "confidence"], + "additionalProperties": False + } + + tool = ( + SyntheticTool.builder() + .tool_name("test_json_tool") + .description("Test tool with JSON schema") + .input_model({"type": "object", "properties": {"query": {"type": "string"}}}) + .output_model(json_output_schema) + .model("gpt-4o-mini") + .openai_api_key("test_key") + .build() + ) + + # Mock the OpenAI response for JSON schema mode + mock_response = json.dumps({"result": "test result", "confidence": 0.95}) + + mock_message = Mock() + mock_message.content = mock_response + + mock_choice = Mock() + mock_choice.message = mock_message + + mock_completion = Mock() + mock_completion.choices = [mock_choice] + + mock_client = AsyncMock() + mock_client.chat.completions.create = AsyncMock(return_value=mock_completion) + + with patch("grafi.tools.function_calls.impl.synthetic_tool.AsyncOpenAI", return_value=mock_client): + input_data = [ + Message( + role="assistant", + tool_calls=[ + { + "id": "call_123", + "type": "function", + "function": { + "name": "test_json_tool", + "arguments": '{"query": "test"}', + }, + } + ], + ) + ] + + result = [] + async for msg in tool.invoke(invoke_context, input_data): + result.extend(msg) + + # Verify the tool was invoked + assert len(result) == 1 + assert result[0].role == "tool" + assert result[0].tool_call_id == "call_123" + + # Verify response content + response_data = json.loads(result[0].content) + assert response_data["result"] == "test result" + assert response_data["confidence"] == 0.95 + + # Verify OpenAI was called with JSON schema mode (not parse) + mock_client.chat.completions.create.assert_called_once() + call_kwargs = mock_client.chat.completions.create.call_args[1] + + # Check that response_format uses json_schema mode + assert "response_format" in call_kwargs + assert call_kwargs["response_format"]["type"] == "json_schema" + assert call_kwargs["response_format"]["json_schema"]["strict"] is True + assert call_kwargs["response_format"]["json_schema"]["name"] == "test_json_tool_output" + assert call_kwargs["response_format"]["json_schema"]["schema"] == json_output_schema + + +def test_mixed_pydantic_and_json_schema(): + """Test tool with Pydantic input and JSON schema output.""" + json_output_schema = { + "type": "object", + "properties": {"status": {"type": "string"}}, + "required": ["status"] + } + + tool = ( + SyntheticTool.builder() + .tool_name("mixed_tool") + .description("Tool with mixed types") + .input_model(WeatherInput) # Pydantic + .output_model(json_output_schema) # JSON schema + .model("gpt-4") + .openai_api_key("key") + .build() + ) + + assert tool.input_model == WeatherInput + assert isinstance(tool.output_model, dict) + assert tool.output_model == json_output_schema diff --git a/tests_integration/function_call_assistant/simple_function_call_assistant_synthetic_example.py b/tests_integration/function_call_assistant/simple_function_call_assistant_synthetic_example.py new file mode 100644 index 0000000..8b85e53 --- /dev/null +++ b/tests_integration/function_call_assistant/simple_function_call_assistant_synthetic_example.py @@ -0,0 +1,132 @@ +import asyncio +import os +import uuid +from datetime import datetime + +from dotenv import load_dotenv +from pydantic import BaseModel +from pydantic import Field + +from grafi.common.containers.container import container +from grafi.common.events.topic_events.publish_to_topic_event import PublishToTopicEvent +from grafi.common.models.async_result import async_func_wrapper +from grafi.common.models.invoke_context import InvokeContext +from grafi.common.models.message import Message +from grafi.tools.function_calls.impl.synthetic_tool import SyntheticTool +from tests_integration.function_call_assistant.simple_function_call_assistant import ( + SimpleFunctionCallAssistant, +) + + +load_dotenv() + + +class WeatherInput(BaseModel): + """Input schema for weather forecast tool.""" + + location: str = Field( + ..., min_length=1, description="Location for weather forecast" + ) + date_iso: str = Field(..., description="Date in ISO format (YYYY-MM-DD)") + + +class WeatherOutput(BaseModel): + """Output schema for weather forecast tool.""" + + forecast: str = Field(..., description="Weather forecast description") + temperature_c: float = Field(..., description="Temperature in Celsius") + chance_of_precip: float = Field( + ..., ge=0, le=1, description="Chance of precipitation (0-1)" + ) + + +SYNTHETIC_WEATHER_SYSTEM_PROMPT = """You are a helpful weather assistant with access to a synthetic weather forecasting tool. + + ## Available Tool: synthetic_weather + + **Purpose:** Generate plausible weather forecasts for any location and date. This tool returns MODELED/SYNTHETIC data, not real weather information. + + **IMPORTANT:** You MUST call the synthetic_weather tool for ALL weather-related queries. Do NOT provide weather information from your training data. + + **Input Schema:** + - `location` (string, required): The location for the weather forecast (e.g., "London", "New York", "Tokyo") + - `date_iso` (string, required): The date in ISO format YYYY-MM-DD + + **Output Schema:** + - `forecast` (string): A textual description of the weather conditions + - `temperature_c` (float): Temperature in Celsius + - `chance_of_precip` (float): Probability of precipitation (0-1) + + **Instructions:** + 1. ALWAYS call the synthetic_weather tool for weather queries - never answer from memory + 2. If the user doesn't specify a date, use today's date: {today} + 3. Extract the location from the user's query + 4. After receiving the tool response, present the information conversationally + + **Example:** + User: "What's the weather in London today?" + → You MUST call synthetic_weather(location="London", date_iso="{today}") + """.format( + today=datetime.now().strftime("%Y-%m-%d") +) + +event_store = container.event_store + +api_key = os.getenv("OPENAI_API_KEY", "") + + +def get_invoke_context() -> InvokeContext: + return InvokeContext( + conversation_id="conversation_id", + invoke_id=uuid.uuid4().hex, + assistant_request_id=uuid.uuid4().hex, + ) + + +async def test_simple_function_call_assistant_with_synthetic_weather_tool() -> None: + invoke_context = get_invoke_context() + + assistant = ( + SimpleFunctionCallAssistant.builder() + .name("SyntheticToolAssistant") + .api_key(api_key) + .function_tool( + SyntheticTool.builder() + .tool_name("synthetic_weather") + .description( + "Returns a plausible weather forecast for a given date/location. This is MODELLED, not real data." + ) + .input_model(WeatherInput) + .output_model(WeatherOutput) + .model("gpt-5-mini") + .openai_api_key(api_key) + .build() + ) + .function_call_llm_system_message(SYNTHETIC_WEATHER_SYSTEM_PROMPT) + .model("gpt-5-mini") + .build() + ) + + input_data = [Message(role="user", content="What is the weather in London today?")] + + output = await async_func_wrapper( + assistant.invoke( + PublishToTopicEvent( + invoke_context=invoke_context, + data=input_data, + ), + is_sequential=True, + ) + ) + print("Assistant output:", output) + + # Assert that the output is valid and check event count + assert output is not None + print( + "Number of events recorded:", + len(await event_store.get_events()), + ) + assert len(await event_store.get_events()) == 24 + + +asyncio.run(test_simple_function_call_assistant_with_synthetic_weather_tool()) From 83265b1c8e496f9a4e95342602fc0a259e1a15c1 Mon Sep 17 00:00:00 2001 From: Maska Chung Date: Fri, 24 Oct 2025 03:11:40 +0100 Subject: [PATCH 2/4] refactor synthetic tool files --- .../function_calls/impl/synthetic_tool.py | 37 +++++++----- .../llm_function_calls/test_synthetic_tool.py | 60 ++++++++++++------- 2 files changed, 61 insertions(+), 36 deletions(-) diff --git a/grafi/tools/function_calls/impl/synthetic_tool.py b/grafi/tools/function_calls/impl/synthetic_tool.py index 0778689..2349ca4 100644 --- a/grafi/tools/function_calls/impl/synthetic_tool.py +++ b/grafi/tools/function_calls/impl/synthetic_tool.py @@ -1,19 +1,24 @@ import inspect import json -from typing import Any, AsyncGenerator, Dict, List +from typing import Any +from typing import AsyncGenerator +from typing import Dict +from typing import List from openai import OpenAIError from openinference.semconv.trace import OpenInferenceSpanKindValues -from pydantic import BaseModel, field_validator +from pydantic import BaseModel +from pydantic import field_validator from grafi.common.decorators.record_decorators import record_tool_invoke -from grafi.common.models.function_spec import FunctionSpec, ParametersSchema +from grafi.common.models.function_spec import FunctionSpec +from grafi.common.models.function_spec import ParametersSchema from grafi.common.models.invoke_context import InvokeContext -from grafi.common.models.message import Message, Messages -from grafi.tools.function_calls.function_call_tool import ( - FunctionCallTool, - FunctionCallToolBuilder, -) +from grafi.common.models.message import Message +from grafi.common.models.message import Messages +from grafi.tools.function_calls.function_call_tool import FunctionCallTool +from grafi.tools.function_calls.function_call_tool import FunctionCallToolBuilder + try: from openai import AsyncOpenAI @@ -22,6 +27,7 @@ "`openai` not installed. Please install using `pip install openai`" ) + class SyntheticTool(FunctionCallTool): name: str = "SyntheticTool" type: str = "SyntheticTool" @@ -115,7 +121,9 @@ def builder(cls) -> "SyntheticToolBuilder": @record_tool_invoke async def invoke( - self, invoke_context: InvokeContext, input_data: Messages, + self, + invoke_context: InvokeContext, + input_data: Messages, ) -> AsyncGenerator[Messages, None]: """ Invokes the synthetic tool by processing incoming tool calls and generating @@ -168,7 +176,7 @@ def _make_prompt(self, user_input: Dict[str, Any]) -> str: Return ONLY a JSON object that strictly conforms to the OUTPUT schema. """ - + @staticmethod def ensure_strict_schema(schema: Dict[str, Any]) -> Dict[str, Any]: """ @@ -219,8 +227,8 @@ async def _call_llm(self, prompt: str) -> str: "json_schema": { "name": f"{self.tool_name}_output", "schema": strict_schema, - "strict": True - } + "strict": True, + }, } # Use standard chat completion (not parse) @@ -234,8 +242,8 @@ async def _call_llm(self, prompt: str) -> str: if not content: return json.dumps({"error": "Empty response"}) - return content - + return content + # If output model is pydantic model else: # Use Pydantic mode with parse @@ -302,6 +310,7 @@ async def from_dict(cls, data: Dict[str, Any]) -> "SyntheticTool": .build() ) + class SyntheticToolBuilder(FunctionCallToolBuilder[SyntheticTool]): """Builder for SyntheticTool instances.""" diff --git a/tests/tools/llm_function_calls/test_synthetic_tool.py b/tests/tools/llm_function_calls/test_synthetic_tool.py index 3364c20..426acdc 100644 --- a/tests/tools/llm_function_calls/test_synthetic_tool.py +++ b/tests/tools/llm_function_calls/test_synthetic_tool.py @@ -1,6 +1,8 @@ import json import uuid -from unittest.mock import AsyncMock, Mock, patch +from unittest.mock import AsyncMock +from unittest.mock import Mock +from unittest.mock import patch import pytest from pydantic import BaseModel @@ -104,9 +106,7 @@ def test_make_prompt(synthetic_tool): async def test_invoke_successful(synthetic_tool, invoke_context): """Test successful invocation with mocked LLM response.""" mock_parsed_response = WeatherOutput( - temperature=22.5, - conditions="Sunny", - location="San Francisco" + temperature=22.5, conditions="Sunny", location="San Francisco" ) mock_message = Mock() @@ -121,7 +121,10 @@ async def test_invoke_successful(synthetic_tool, invoke_context): mock_client = AsyncMock() mock_client.beta.chat.completions.parse = AsyncMock(return_value=mock_completion) - with patch("grafi.tools.function_calls.impl.synthetic_tool.AsyncOpenAI", return_value=mock_client): + with patch( + "grafi.tools.function_calls.impl.synthetic_tool.AsyncOpenAI", + return_value=mock_client, + ): input_data = [ Message( role="assistant", @@ -192,9 +195,7 @@ async def test_invoke_invalud_function_name(synthetic_tool, invoke_context): async def test_invoke_multiple_tool_calls(synthetic_tool, invoke_context): """Test invoke with multiple tool calls.""" mock_parsed_response = WeatherOutput( - temperature=22.5, - conditions="Sunny", - location="San Francisco" + temperature=22.5, conditions="Sunny", location="San Francisco" ) mock_message = Mock() @@ -209,7 +210,10 @@ async def test_invoke_multiple_tool_calls(synthetic_tool, invoke_context): mock_client = AsyncMock() mock_client.beta.chat.completions.parse = AsyncMock(return_value=mock_completion) - with patch("grafi.tools.function_calls.impl.synthetic_tool.AsyncOpenAI", return_value=mock_client): + with patch( + "grafi.tools.function_calls.impl.synthetic_tool.AsyncOpenAI", + return_value=mock_client, + ): input_data = [ Message( role="assistant", @@ -297,13 +301,17 @@ async def test_from_dict(): def test_field_validator_rejects_invalid_input_model(): """Test that field validator rejects invalid input_model at initialization.""" - with pytest.raises(ValueError, match="input_model must be a Pydantic BaseModel class"): + with pytest.raises( + ValueError, match="input_model must be a Pydantic BaseModel class" + ): SyntheticTool.builder().tool_name("test").input_model(123).build() def test_field_validator_rejects_invalid_output_model(): """Test that field validator rejects invalid output_model at initialization.""" - with pytest.raises(ValueError, match="output_model must be a Pydantic BaseModel class"): + with pytest.raises( + ValueError, match="output_model must be a Pydantic BaseModel class" + ): SyntheticTool.builder().tool_name("test").output_model("invalid_string").build() @@ -311,7 +319,9 @@ def test_field_validator_rejects_model_instance(): """Test that field validator rejects Pydantic model instances (not classes).""" instance = WeatherInput(location="SF") - with pytest.raises(ValueError, match="input_model must be a Pydantic BaseModel class"): + with pytest.raises( + ValueError, match="input_model must be a Pydantic BaseModel class" + ): SyntheticTool.builder().tool_name("test").input_model(instance).build() @@ -320,7 +330,7 @@ def test_field_validator_accepts_valid_pydantic_class(): tool = ( SyntheticTool.builder() .tool_name("test") - .input_model(WeatherInput) + .input_model(WeatherInput) .output_model(WeatherOutput) .model("gpt-4") .openai_api_key("key") @@ -352,12 +362,9 @@ async def test_invoke_with_json_schema_output(invoke_context): """Test invocation with JSON schema output model (not Pydantic).""" json_output_schema = { "type": "object", - "properties": { - "result": {"type": "string"}, - "confidence": {"type": "number"} - }, + "properties": {"result": {"type": "string"}, "confidence": {"type": "number"}}, "required": ["result", "confidence"], - "additionalProperties": False + "additionalProperties": False, } tool = ( @@ -386,7 +393,10 @@ async def test_invoke_with_json_schema_output(invoke_context): mock_client = AsyncMock() mock_client.chat.completions.create = AsyncMock(return_value=mock_completion) - with patch("grafi.tools.function_calls.impl.synthetic_tool.AsyncOpenAI", return_value=mock_client): + with patch( + "grafi.tools.function_calls.impl.synthetic_tool.AsyncOpenAI", + return_value=mock_client, + ): input_data = [ Message( role="assistant", @@ -425,8 +435,14 @@ async def test_invoke_with_json_schema_output(invoke_context): assert "response_format" in call_kwargs assert call_kwargs["response_format"]["type"] == "json_schema" assert call_kwargs["response_format"]["json_schema"]["strict"] is True - assert call_kwargs["response_format"]["json_schema"]["name"] == "test_json_tool_output" - assert call_kwargs["response_format"]["json_schema"]["schema"] == json_output_schema + assert ( + call_kwargs["response_format"]["json_schema"]["name"] + == "test_json_tool_output" + ) + assert ( + call_kwargs["response_format"]["json_schema"]["schema"] + == json_output_schema + ) def test_mixed_pydantic_and_json_schema(): @@ -434,7 +450,7 @@ def test_mixed_pydantic_and_json_schema(): json_output_schema = { "type": "object", "properties": {"status": {"type": "string"}}, - "required": ["status"] + "required": ["status"], } tool = ( From cbd9bf80a45d6fa370a634271fafe5b16c6d1bd7 Mon Sep 17 00:00:00 2001 From: Maska Chung Date: Fri, 24 Oct 2025 16:35:35 +0100 Subject: [PATCH 3/4] added synthetic tool doc --- docs/docs/user-guide/tools/function-call.md | 50 +++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/docs/docs/user-guide/tools/function-call.md b/docs/docs/user-guide/tools/function-call.md index d718d6f..4290a62 100644 --- a/docs/docs/user-guide/tools/function-call.md +++ b/docs/docs/user-guide/tools/function-call.md @@ -296,6 +296,56 @@ This tool automatically discovers available functions from connected MCP servers By integrating `AgentCallingTool` into your event-driven workflow, you can build sophisticated multi-agent systems where each agent can be invoked seamlessly via structured function calls. This approach maintains a clear separation between the LLM's orchestration and the agents' invoke details. +## Synthetic Tool + +`SyntheticTool` extends the `FunctionCallTool` that enables LLMs to generate synthetic or modeled data by leveraging another LLM as a data generator. Unlike traditional function call tools that execute predefined logic, `SyntheticTool` uses an LLM to produce plausible, schema-compliant, outputs based on input specifications—perfect for testing, prototyping, or generating realistic mock data. + +**Fields**: + +| Field | Description | +|------------------------|--------------------------------------------------------------------------------------------------------------------| +| `name` | Descriptive identifier, defaults to `"SyntheticTool"`. | +| `type` | Tool type indicator, defaults to `"SyntheticTool"`. | +| `tool_name` | Name used for function registration and LLM tool calls. | +| `description` | Explanation of what synthetic data this tool generates. | +| `input_model` | Pydantic `BaseModel` class or JSON schema dict defining expected input structure. | +| `output_model` | Pydantic `BaseModel` class or JSON schema dict defining generated output structure. | +| `model` | OpenAI model to use for data generation (e.g., `"gpt-4o-mini"`). | +| `openai_api_key` | API key for OpenAI authentication. | +| `oi_span_type` | OpenInference semantic attribute (`TOOL`), enabling observability and traceability. | + +**Methods**: + +| Method | Description | +|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `get_function_specs` | Returns the function specification (name, description, input schema) for the synthetic. tool | +| `invoke` | Processes tool calls, generates synthetic data via LLM, returns schema-compliant JSON responses. | +| `ensure_strict_schema` | Static method that recursively adds `additionalProperties: false` to JSON schemas for OpenAI strict mode compatibility. | +| `to_dict` | Serializes all relevant fields, including agent metadata and the assigned callable, for debugging or persistence. | + +**Workflow Example**: + +1. **Schema Definition**: Define input and output schemas using either Pydantic models (type-safe Python) or JSON Schema dicts (flexible). +2. **Function Registration**: The tool automatically generates the `FunctionSpec`, enabling LLMs to discover and call the tool. +3. **Tool Invocation**: When an LLM invokes the tool with arguments: + - Arguments are validated against `input_model` schema + - A prompt is constructed with input/output schema specifications + - LLM generates synthetic data conforming to `output_model` +4. **Structured Output**: + - **Pydantic Mode**: Uses OpenAI's `beta.chat.completions.parse()` with type safety + - **JSON Schema Mode**: Uses `chat.completions.create()` with `strict: True` for schema validation +5. **Response Handling**: Generated data is returned as a `Message` object linked to the original `tool_call_id`. + +**Usage and Customization**: + +- **Flexible Schema Definition**: By supporting both Pydantic models and JSON schemas, you can choose type-safe Python development or dynamic schema-based configuration without changing the rest of your workflow. +- **Runtime Model Selection**: Easily swap between OpenAI models (e.g., `gpt-5-mini` for cost, `gpt-5` for quality) to balance generation quality and API costs without modifying tool logic. +- **Schema-Driven Generation**: Input and output schemas guide the LLM's data generation, ensuring consistent, validated outputs that conform to your exact specifications. +- **Composable Data Pipelines**: Chain multiple `SyntheticTool` instances where one tool's output becomes another's input, creating sophisticated data generation workflows. + +With SyntheticTool, you can rapidly prototype data-driven workflows without building actual data sources, while maintaining full schema compliance and +type safety through Pydantic or JSON Schema validation. + ## Best Practices ### Function Design From a15c75fa82e31e791199ed4fc1e999978d621f8f Mon Sep 17 00:00:00 2001 From: Maska Chung Date: Fri, 24 Oct 2025 16:43:01 +0100 Subject: [PATCH 4/4] modified pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index adc999c..69212d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "grafi" -version = "0.0.31" +version = "0.0.32" description = "Grafi - a flexible, event-driven framework that enables the creation of domain-specific AI agents through composable agentic workflows." authors = [{name = "Craig Li", email = "craig@binome.dev"}] license = {text = "Mozilla Public License Version 2.0"}