Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class OutputConverter(BaseModel, ABC):
llm: Any = Field(description="The language model to be used to convert the text.")
model: Any = Field(description="The model to be used to convert the text.")
instructions: str = Field(description="Conversion instructions to the LLM.")
max_attempts: Optional[int] = Field(
max_attempts: int = Field(
description="Max number of attempts to try to get the output formatted.",
default=3,
)
Expand Down
23 changes: 17 additions & 6 deletions src/crewai/utilities/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,24 @@ def to_pydantic(self, current_attempt=1):
if self.llm.supports_function_calling():
return self._create_instructor().to_pydantic()
else:
return self.llm.call(
response = self.llm.call(
[
{"role": "system", "content": self.instructions},
{"role": "user", "content": self.text},
]
)
return self.model.model_validate_json(response)
except ValidationError as e:
if current_attempt < self.max_attempts:
return self.to_pydantic(current_attempt + 1)
raise ConverterError(
f"Failed to convert text into a Pydantic model due to the following validation error: {e}"
)
except Exception as e:
if current_attempt < self.max_attempts:
return self.to_pydantic(current_attempt + 1)
return ConverterError(
f"Failed to convert text into a pydantic model due to the following error: {e}"
raise ConverterError(
f"Failed to convert text into a Pydantic model due to the following error: {e}"
)

def to_json(self, current_attempt=1):
Expand Down Expand Up @@ -66,7 +73,6 @@ def _create_instructor(self):
llm=self.llm,
model=self.model,
content=self.text,
instructions=self.instructions,
)
return inst

Expand Down Expand Up @@ -187,10 +193,15 @@ def convert_with_instructions(


def get_conversion_instructions(model: Type[BaseModel], llm: Any) -> str:
instructions = "I'm gonna convert this raw text into valid JSON."
instructions = "Please convert the following text into valid JSON."
if llm.supports_function_calling():
model_schema = PydanticSchemaParser(model=model).get_schema()
instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
instructions += (
f"\n\nThe JSON should follow this schema:\n```json\n{model_schema}\n```"
)
else:
model_description = generate_model_description(model)
instructions += f"\n\nThe JSON should follow this format:\n{model_description}"
return instructions


Expand Down
9 changes: 1 addition & 8 deletions src/crewai/utilities/internal_instructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,10 @@ def __init__(
model: Type,
agent: Optional[Any] = None,
llm: Optional[str] = None,
instructions: Optional[str] = None,
):
self.content = content
self.agent = agent
self.llm = llm
self.instructions = instructions
self.model = model
self._client = None
self.set_instructor()
Expand All @@ -31,19 +29,14 @@ def set_instructor(self):
import instructor
from litellm import completion

self._client = instructor.from_litellm(
completion,
mode=instructor.Mode.TOOLS,
)
self._client = instructor.from_litellm(completion)

def to_json(self):
model = self.to_pydantic()
return model.model_dump_json(indent=2)

def to_pydantic(self):
messages = [{"role": "user", "content": self.content}]
if self.instructions:
messages.append({"role": "system", "content": self.instructions})
model = self._client.chat.completions.create(
model=self.llm.model, response_model=self.model, messages=messages
)
Expand Down
107 changes: 75 additions & 32 deletions src/crewai/utilities/pydantic_schema_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Type, Union, get_args, get_origin
from typing import Dict, List, Type, Union, get_args, get_origin

from pydantic import BaseModel

Expand All @@ -10,40 +10,83 @@ def get_schema(self) -> str:
"""
Public method to get the schema of a Pydantic model.

:param model: The Pydantic model class to generate schema for.
:return: String representation of the model schema.
"""
return self._get_model_schema(self.model)

def _get_model_schema(self, model, depth=0) -> str:
indent = " " * depth
lines = [f"{indent}{{"]
for field_name, field in model.model_fields.items():
field_type_str = self._get_field_type(field, depth + 1)
lines.append(f"{indent} {field_name}: {field_type_str},")
lines[-1] = lines[-1].rstrip(",") # Remove trailing comma from last item
lines.append(f"{indent}}}")
return "\n".join(lines)

def _get_field_type(self, field, depth) -> str:
return "{\n" + self._get_model_schema(self.model) + "\n}"

def _get_model_schema(self, model: Type[BaseModel], depth: int = 0) -> str:
indent = " " * 4 * depth
lines = [
f"{indent} {field_name}: {self._get_field_type(field, depth + 1)}"
for field_name, field in model.model_fields.items()
]
return ",\n".join(lines)

def _get_field_type(self, field, depth: int) -> str:
field_type = field.annotation
if get_origin(field_type) is list:
origin = get_origin(field_type)

if origin in {list, List}:
list_item_type = get_args(field_type)[0]
if isinstance(list_item_type, type) and issubclass(
list_item_type, BaseModel
):
nested_schema = self._get_model_schema(list_item_type, depth + 1)
return f"List[\n{nested_schema}\n{' ' * 4 * depth}]"
else:
return f"List[{list_item_type.__name__}]"
elif get_origin(field_type) is Union:
union_args = get_args(field_type)
if type(None) in union_args:
non_none_type = next(arg for arg in union_args if arg is not type(None))
return f"Optional[{self._get_field_type(field.__class__(annotation=non_none_type), depth)}]"
return self._format_list_type(list_item_type, depth)

if origin in {dict, Dict}:
key_type, value_type = get_args(field_type)
return f"Dict[{key_type.__name__}, {value_type.__name__}]"

if origin is Union:
return self._format_union_type(field_type, depth)

if isinstance(field_type, type) and issubclass(field_type, BaseModel):
nested_schema = self._get_model_schema(field_type, depth)
nested_indent = " " * 4 * depth
return f"{field_type.__name__}\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}"

return field_type.__name__

def _format_list_type(self, list_item_type, depth: int) -> str:
if isinstance(list_item_type, type) and issubclass(list_item_type, BaseModel):
nested_schema = self._get_model_schema(list_item_type, depth + 1)
nested_indent = " " * 4 * (depth)
return f"List[\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}\n{nested_indent}]"
return f"List[{list_item_type.__name__}]"

def _format_union_type(self, field_type, depth: int) -> str:
args = get_args(field_type)
if type(None) in args:
# It's an Optional type
non_none_args = [arg for arg in args if arg is not type(None)]
if len(non_none_args) == 1:
inner_type = self._get_field_type_for_annotation(
non_none_args[0], depth
)
return f"Optional[{inner_type}]"
else:
return f"Union[{', '.join(arg.__name__ for arg in union_args)}]"
elif isinstance(field_type, type) and issubclass(field_type, BaseModel):
return self._get_model_schema(field_type, depth)
# Union with None and multiple other types
inner_types = ", ".join(
self._get_field_type_for_annotation(arg, depth)
for arg in non_none_args
)
return f"Optional[Union[{inner_types}]]"
else:
return getattr(field_type, "__name__", str(field_type))
# General Union type
inner_types = ", ".join(
self._get_field_type_for_annotation(arg, depth) for arg in args
)
return f"Union[{inner_types}]"

def _get_field_type_for_annotation(self, annotation, depth: int) -> str:
origin = get_origin(annotation)
if origin in {list, List}:
list_item_type = get_args(annotation)[0]
return self._format_list_type(list_item_type, depth)
if origin in {dict, Dict}:
key_type, value_type = get_args(annotation)
return f"Dict[{key_type.__name__}, {value_type.__name__}]"
if origin is Union:
return self._format_union_type(annotation, depth)
if isinstance(annotation, type) and issubclass(annotation, BaseModel):
nested_schema = self._get_model_schema(annotation, depth)
nested_indent = " " * 4 * depth
return f"{annotation.__name__}\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}"
return annotation.__name__
114 changes: 114 additions & 0 deletions tests/utilities/cassettes/test_convert_with_instructions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "Name: Alice, Age: 30"}], "model":
"gpt-4o-mini", "tool_choice": {"type": "function", "function": {"name": "SimpleModel"}},
"tools": [{"type": "function", "function": {"name": "SimpleModel", "description":
"Correctly extracted `SimpleModel` with all the required parameters with correct
types", "parameters": {"properties": {"name": {"title": "Name", "type": "string"},
"age": {"title": "Age", "type": "integer"}}, "required": ["age", "name"], "type":
"object"}}}]}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '507'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.59.6
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.59.6
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-Aq4a4xDv8G0i4fbTtPJEI2B8UNBup\",\n \"object\":
\"chat.completion\",\n \"created\": 1736974028,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n
\ \"id\": \"call_uO5nec8hTk1fpYINM8TUafhe\",\n \"type\":
\"function\",\n \"function\": {\n \"name\": \"SimpleModel\",\n
\ \"arguments\": \"{\\\"name\\\":\\\"Alice\\\",\\\"age\\\":30}\"\n
\ }\n }\n ],\n \"refusal\": null\n },\n
\ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n
\ \"usage\": {\n \"prompt_tokens\": 79,\n \"completion_tokens\": 10,\n
\ \"total_tokens\": 89,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_72ed7ab54c\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 9028b81aeb1cb05f-ATL
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Wed, 15 Jan 2025 20:47:08 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=PzayZLF04c14veGc.0ocVg3VHBbpzKRW8Hqox8L9U7c-1736974028-1.0.1.1-mZpK8.SH9l7K2z8Tvt6z.dURiVPjFqEz7zYEITfRwdr5z0razsSebZGN9IRPmI5XC_w5rbZW2Kg6hh5cenXinQ;
path=/; expires=Wed, 15-Jan-25 21:17:08 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=ciwC3n2Srn20xx4JhEUeN6Ap0tNBaE44S95nIilboQ0-1736974028496-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '439'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999978'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_a468000458b9d2848b7497b2e3d485a3
http_version: HTTP/1.1
status_code: 200
version: 1
Loading