recursiveAF · recursiveAF · Jan 16, 2025
diff --git a/src/crewai/agents/agent_builder/utilities/base_output_converter.py b/src/crewai/agents/agent_builder/utilities/base_output_converter.py
@@ -25,7 +25,7 @@ class OutputConverter(BaseModel, ABC):
     llm: Any = Field(description="The language model to be used to convert the text.")
     model: Any = Field(description="The model to be used to convert the text.")
     instructions: str = Field(description="Conversion instructions to the LLM.")
-    max_attempts: Optional[int] = Field(
+    max_attempts: int = Field(
         description="Max number of attempts to try to get the output formatted.",
         default=3,
     )

diff --git a/src/crewai/utilities/converter.py b/src/crewai/utilities/converter.py
@@ -26,17 +26,24 @@ def to_pydantic(self, current_attempt=1):
             if self.llm.supports_function_calling():
                 return self._create_instructor().to_pydantic()
             else:
-                return self.llm.call(
+                response = self.llm.call(
                     [
                         {"role": "system", "content": self.instructions},
                         {"role": "user", "content": self.text},
                     ]
                 )
+                return self.model.model_validate_json(response)
+        except ValidationError as e:
+            if current_attempt < self.max_attempts:
+                return self.to_pydantic(current_attempt + 1)
+            raise ConverterError(
+                f"Failed to convert text into a Pydantic model due to the following validation error: {e}"
+            )
         except Exception as e:
             if current_attempt < self.max_attempts:
                 return self.to_pydantic(current_attempt + 1)
-            return ConverterError(
-                f"Failed to convert text into a pydantic model due to the following error: {e}"
+            raise ConverterError(
+                f"Failed to convert text into a Pydantic model due to the following error: {e}"
             )
 
     def to_json(self, current_attempt=1):
@@ -66,7 +73,6 @@ def _create_instructor(self):
             llm=self.llm,
             model=self.model,
             content=self.text,
-            instructions=self.instructions,
         )
         return inst
 
@@ -187,10 +193,15 @@ def convert_with_instructions(
 
 
 def get_conversion_instructions(model: Type[BaseModel], llm: Any) -> str:
-    instructions = "I'm gonna convert this raw text into valid JSON."
+    instructions = "Please convert the following text into valid JSON."
     if llm.supports_function_calling():
         model_schema = PydanticSchemaParser(model=model).get_schema()
-        instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
+        instructions += (
+            f"\n\nThe JSON should follow this schema:\n```json\n{model_schema}\n```"
+        )
+    else:
+        model_description = generate_model_description(model)
+        instructions += f"\n\nThe JSON should follow this format:\n{model_description}"
     return instructions
 
 

diff --git a/src/crewai/utilities/internal_instructor.py b/src/crewai/utilities/internal_instructor.py
@@ -11,12 +11,10 @@ def __init__(
         model: Type,
         agent: Optional[Any] = None,
         llm: Optional[str] = None,
-        instructions: Optional[str] = None,
     ):
         self.content = content
         self.agent = agent
         self.llm = llm
-        self.instructions = instructions
         self.model = model
         self._client = None
         self.set_instructor()
@@ -31,19 +29,14 @@ def set_instructor(self):
             import instructor
             from litellm import completion
 
-            self._client = instructor.from_litellm(
-                completion,
-                mode=instructor.Mode.TOOLS,
-            )
+            self._client = instructor.from_litellm(completion)
 
     def to_json(self):
         model = self.to_pydantic()
         return model.model_dump_json(indent=2)
 
     def to_pydantic(self):
         messages = [{"role": "user", "content": self.content}]
-        if self.instructions:
-            messages.append({"role": "system", "content": self.instructions})
         model = self._client.chat.completions.create(
             model=self.llm.model, response_model=self.model, messages=messages
         )

diff --git a/src/crewai/utilities/pydantic_schema_parser.py b/src/crewai/utilities/pydantic_schema_parser.py
@@ -1,4 +1,4 @@
-from typing import Type, Union, get_args, get_origin
+from typing import Dict, List, Type, Union, get_args, get_origin
 
 from pydantic import BaseModel
 
@@ -10,40 +10,83 @@ def get_schema(self) -> str:
         """
         Public method to get the schema of a Pydantic model.
 
-        :param model: The Pydantic model class to generate schema for.
         :return: String representation of the model schema.
         """
-        return self._get_model_schema(self.model)
-
-    def _get_model_schema(self, model, depth=0) -> str:
-        indent = "    " * depth
-        lines = [f"{indent}{{"]
-        for field_name, field in model.model_fields.items():
-            field_type_str = self._get_field_type(field, depth + 1)
-            lines.append(f"{indent}    {field_name}: {field_type_str},")
-        lines[-1] = lines[-1].rstrip(",")  # Remove trailing comma from last item
-        lines.append(f"{indent}}}")
-        return "\n".join(lines)
-
-    def _get_field_type(self, field, depth) -> str:
+        return "{\n" + self._get_model_schema(self.model) + "\n}"
+
+    def _get_model_schema(self, model: Type[BaseModel], depth: int = 0) -> str:
+        indent = " " * 4 * depth
+        lines = [
+            f"{indent}    {field_name}: {self._get_field_type(field, depth + 1)}"
+            for field_name, field in model.model_fields.items()
+        ]
+        return ",\n".join(lines)
+
+    def _get_field_type(self, field, depth: int) -> str:
         field_type = field.annotation
-        if get_origin(field_type) is list:
+        origin = get_origin(field_type)
+
+        if origin in {list, List}:
             list_item_type = get_args(field_type)[0]
-            if isinstance(list_item_type, type) and issubclass(
-                list_item_type, BaseModel
-            ):
-                nested_schema = self._get_model_schema(list_item_type, depth + 1)
-                return f"List[\n{nested_schema}\n{' ' * 4 * depth}]"
-            else:
-                return f"List[{list_item_type.__name__}]"
-        elif get_origin(field_type) is Union:
-            union_args = get_args(field_type)
-            if type(None) in union_args:
-                non_none_type = next(arg for arg in union_args if arg is not type(None))
-                return f"Optional[{self._get_field_type(field.__class__(annotation=non_none_type), depth)}]"
+            return self._format_list_type(list_item_type, depth)
+
+        if origin in {dict, Dict}:
+            key_type, value_type = get_args(field_type)
+            return f"Dict[{key_type.__name__}, {value_type.__name__}]"
+
+        if origin is Union:
+            return self._format_union_type(field_type, depth)
+
+        if isinstance(field_type, type) and issubclass(field_type, BaseModel):
+            nested_schema = self._get_model_schema(field_type, depth)
+            nested_indent = " " * 4 * depth
+            return f"{field_type.__name__}\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}"
+
+        return field_type.__name__
+
+    def _format_list_type(self, list_item_type, depth: int) -> str:
+        if isinstance(list_item_type, type) and issubclass(list_item_type, BaseModel):
+            nested_schema = self._get_model_schema(list_item_type, depth + 1)
+            nested_indent = " " * 4 * (depth)
+            return f"List[\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}\n{nested_indent}]"
+        return f"List[{list_item_type.__name__}]"
+
+    def _format_union_type(self, field_type, depth: int) -> str:
+        args = get_args(field_type)
+        if type(None) in args:
+            # It's an Optional type
+            non_none_args = [arg for arg in args if arg is not type(None)]
+            if len(non_none_args) == 1:
+                inner_type = self._get_field_type_for_annotation(
+                    non_none_args[0], depth
+                )
+                return f"Optional[{inner_type}]"
             else:
-                return f"Union[{', '.join(arg.__name__ for arg in union_args)}]"
-        elif isinstance(field_type, type) and issubclass(field_type, BaseModel):
-            return self._get_model_schema(field_type, depth)
+                # Union with None and multiple other types
+                inner_types = ", ".join(
+                    self._get_field_type_for_annotation(arg, depth)
+                    for arg in non_none_args
+                )
+                return f"Optional[Union[{inner_types}]]"
         else:
-            return getattr(field_type, "__name__", str(field_type))
+            # General Union type
+            inner_types = ", ".join(
+                self._get_field_type_for_annotation(arg, depth) for arg in args
+            )
+            return f"Union[{inner_types}]"
+
+    def _get_field_type_for_annotation(self, annotation, depth: int) -> str:
+        origin = get_origin(annotation)
+        if origin in {list, List}:
+            list_item_type = get_args(annotation)[0]
+            return self._format_list_type(list_item_type, depth)
+        if origin in {dict, Dict}:
+            key_type, value_type = get_args(annotation)
+            return f"Dict[{key_type.__name__}, {value_type.__name__}]"
+        if origin is Union:
+            return self._format_union_type(annotation, depth)
+        if isinstance(annotation, type) and issubclass(annotation, BaseModel):
+            nested_schema = self._get_model_schema(annotation, depth)
+            nested_indent = " " * 4 * depth
+            return f"{annotation.__name__}\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}"
+        return annotation.__name__
diff --git a/tests/utilities/cassettes/test_convert_with_instructions.yaml b/tests/utilities/cassettes/test_convert_with_instructions.yaml
@@ -0,0 +1,114 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Name: Alice, Age: 30"}], "model":
+      "gpt-4o-mini", "tool_choice": {"type": "function", "function": {"name": "SimpleModel"}},
+      "tools": [{"type": "function", "function": {"name": "SimpleModel", "description":
+      "Correctly extracted `SimpleModel` with all the required parameters with correct
+      types", "parameters": {"properties": {"name": {"title": "Name", "type": "string"},
+      "age": {"title": "Age", "type": "integer"}}, "required": ["age", "name"], "type":
+      "object"}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '507'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.59.6
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.59.6
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.7
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    content: "{\n  \"id\": \"chatcmpl-Aq4a4xDv8G0i4fbTtPJEI2B8UNBup\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1736974028,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+      \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+      \           \"id\": \"call_uO5nec8hTk1fpYINM8TUafhe\",\n            \"type\":
+      \"function\",\n            \"function\": {\n              \"name\": \"SimpleModel\",\n
+      \             \"arguments\": \"{\\\"name\\\":\\\"Alice\\\",\\\"age\\\":30}\"\n
+      \           }\n          }\n        ],\n        \"refusal\": null\n      },\n
+      \     \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n
+      \ \"usage\": {\n    \"prompt_tokens\": 79,\n    \"completion_tokens\": 10,\n
+      \   \"total_tokens\": 89,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
+      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+      \"default\",\n  \"system_fingerprint\": \"fp_72ed7ab54c\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 9028b81aeb1cb05f-ATL
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 15 Jan 2025 20:47:08 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=PzayZLF04c14veGc.0ocVg3VHBbpzKRW8Hqox8L9U7c-1736974028-1.0.1.1-mZpK8.SH9l7K2z8Tvt6z.dURiVPjFqEz7zYEITfRwdr5z0razsSebZGN9IRPmI5XC_w5rbZW2Kg6hh5cenXinQ;
+        path=/; expires=Wed, 15-Jan-25 21:17:08 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=ciwC3n2Srn20xx4JhEUeN6Ap0tNBaE44S95nIilboQ0-1736974028496-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '439'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999978'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_a468000458b9d2848b7497b2e3d485a3
+    http_version: HTTP/1.1
+    status_code: 200
+version: 1