From 760b8ddb8c3677f7a2a731fcf1db42220ba7b987 Mon Sep 17 00:00:00 2001
From: Agam More <agam@apprais.ai>
Date: Mon, 11 Aug 2025 20:02:51 -0500
Subject: [PATCH 1/2] Add save_to_json() convenience method to JobResult

- Add simple save_to_json() method that handles Citation serialization automatically
- Creates directories as needed and uses existing to_dict() method internally
- Updated README.md with usage examples
- Added comprehensive test with tmp_path fixture
- Minimal code addition (~10 lines) for maximum user convenience

Fixes the need for users to manually handle Citation serialization when saving results.
---
 README.md                      | 14 ++++++++
 batchata/core/job_result.py    | 16 ++++++++-
 batchata/types.py              | 13 ++++++++
 batchata/utils/__init__.py     |  3 +-
 batchata/utils/json_encoder.py | 47 ++++++++++++++++++++++++++
 pyproject.toml                 |  2 +-
 tests/core/test_job_result.py  | 61 +++++++++++++++++++++++++++++++++-
 uv.lock                        |  2 +-
 8 files changed, 153 insertions(+), 5 deletions(-)
 create mode 100644 batchata/utils/json_encoder.py

diff --git a/README.md b/README.md
index c45a6ef..e09dab4 100644
--- a/README.md
+++ b/README.md
@@ -121,6 +121,20 @@ for result in results["cancelled"]:
     print(f"\nJob {result.job_id} was cancelled: {result.error}")
 ```
 
+## Saving Results to JSON
+
+You can easily save individual job results to JSON files:
+
+```python
+# Save individual results to JSON files
+for result in results["completed"]:
+    result.save_to_json(f"output/{result.job_id}.json")
+
+# Or save with custom formatting
+result.save_to_json("my_result.json", indent=4)
+```
+
+This automatically handles Citation serialization and creates any necessary directories.
 
 ## Interactive Progress Display
 
diff --git a/batchata/core/job_result.py b/batchata/core/job_result.py
index 3f57ba1..eaeba3d 100644
--- a/batchata/core/job_result.py
+++ b/batchata/core/job_result.py
@@ -1,6 +1,6 @@
 """JobResult data model."""
 
-from dataclasses import asdict, dataclass
+from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Union
 from pydantic import BaseModel
 
@@ -88,6 +88,20 @@ def to_dict(self) -> Dict[str, Any]:
             "batch_id": self.batch_id
         }
     
+    def save_to_json(self, filepath: str, indent: int = 2) -> None:
+        """Save JobResult to JSON file.
+        
+        Args:
+            filepath: Path to save the JSON file
+            indent: JSON indentation (default: 2)
+        """
+        import json
+        from pathlib import Path
+        
+        Path(filepath).parent.mkdir(parents=True, exist_ok=True)
+        with open(filepath, 'w') as f:
+            json.dump(self.to_dict(), f, indent=indent)
+    
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> 'JobResult':
         """Deserialize from state."""
diff --git a/batchata/types.py b/batchata/types.py
index 909624e..5c8e773 100644
--- a/batchata/types.py
+++ b/batchata/types.py
@@ -12,6 +12,19 @@ class Citation:
     source: str  # Source identifier (e.g., page number, section)
     page: Optional[int] = None  # Page number if applicable
     metadata: Optional[Dict[str, Any]] = None  # Additional metadata
+    
+    def __json__(self):
+        """Make Citation JSON serializable.
+        
+        This method is called by json.dumps() when using the default encoder.
+        Returns a dictionary representation that can be serialized to JSON.
+        """
+        return {
+            'text': self.text,
+            'source': self.source,
+            'page': self.page,
+            'metadata': self.metadata
+        }
 
 
 @dataclass
diff --git a/batchata/utils/__init__.py b/batchata/utils/__init__.py
index 81c926d..974c2dd 100644
--- a/batchata/utils/__init__.py
+++ b/batchata/utils/__init__.py
@@ -6,5 +6,6 @@
 from .logging import get_logger, set_log_level
 from .pdf import create_pdf
 from .rich_progress import RichBatchProgressDisplay
+from .json_encoder import BatchataJSONEncoder
 
-__all__ = ["CostTracker", "to_dict", "StateManager", "get_logger", "set_log_level", "create_pdf", "RichBatchProgressDisplay"]
\ No newline at end of file
+__all__ = ["CostTracker", "to_dict", "StateManager", "get_logger", "set_log_level", "create_pdf", "RichBatchProgressDisplay", "BatchataJSONEncoder"]
\ No newline at end of file
diff --git a/batchata/utils/json_encoder.py b/batchata/utils/json_encoder.py
new file mode 100644
index 0000000..8afcb4c
--- /dev/null
+++ b/batchata/utils/json_encoder.py
@@ -0,0 +1,47 @@
+"""Custom JSON encoder for batchata objects."""
+
+import json
+from dataclasses import asdict, is_dataclass
+from typing import Any
+from pydantic import BaseModel
+
+from ..types import Citation
+
+
+class BatchataJSONEncoder(json.JSONEncoder):
+    """Custom JSON encoder that handles batchata objects like Citation.
+    
+    This encoder automatically converts Citation objects and other dataclasses
+    to dictionaries for JSON serialization.
+    
+    Usage:
+        ```python
+        import json
+        from batchata.utils import BatchataJSONEncoder
+        
+        # Now JobResult objects with Citation objects can be serialized directly
+        json.dump(job_result, f, cls=BatchataJSONEncoder, indent=2)
+        ```
+    """
+    
+    def default(self, obj: Any) -> Any:
+        """Convert objects to JSON-serializable format."""
+        # Handle Citation objects specifically
+        if isinstance(obj, Citation):
+            return {
+                'text': obj.text,
+                'source': obj.source,
+                'page': obj.page,
+                'metadata': obj.metadata
+            }
+        
+        # Handle other dataclasses
+        if is_dataclass(obj):
+            return asdict(obj)
+        
+        # Handle Pydantic models
+        if isinstance(obj, BaseModel):
+            return obj.model_dump()
+        
+        # Let the base class handle other types
+        return super().default(obj)
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 9384c1a..ef7ffbc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "batchata"
-version = "0.4.4"
+version = "0.4.5"
 description = "Unified Python API for AI batch requests with 50% cost savings on OpenAI and Anthropic"
 readme = "README.md"
 requires-python = ">=3.12"
diff --git a/tests/core/test_job_result.py b/tests/core/test_job_result.py
index 77509fa..e7eb687 100644
--- a/tests/core/test_job_result.py
+++ b/tests/core/test_job_result.py
@@ -316,4 +316,63 @@ def test_citation_mappings_json_serialization(self):
         assert len(restored.citations) == 2
         assert len(restored.citation_mappings) == 3
         assert len(restored.citation_mappings['cap_rate']) == 2
-        assert len(restored.citation_mappings['occupancy']) == 1
\ No newline at end of file
+        assert len(restored.citation_mappings['occupancy']) == 1
+    
+    def test_save_to_json(self, tmp_path):
+        """Test that save_to_json() correctly saves JobResult to a JSON file."""
+        # Create a JobResult with citations and citation_mappings
+        citations = [
+            Citation(
+                text='Test citation text',
+                source='test.pdf',
+                page=1,
+                metadata={'type': 'page_location', 'document_index': 0}
+            )
+        ]
+        
+        citation_mappings = {
+            'test_field': citations
+        }
+        
+        result = JobResult(
+            job_id="test-save-json",
+            raw_response="Test response",
+            parsed_response={'test_field': 'test_value'},
+            citations=citations,
+            citation_mappings=citation_mappings,
+            input_tokens=100,
+            output_tokens=50,
+            cost_usd=0.05
+        )
+        
+        # Save to JSON file
+        json_file = tmp_path / "subdir" / "test_result.json"
+        result.save_to_json(str(json_file))
+        
+        # Verify file was created
+        assert json_file.exists()
+        
+        # Verify content is correct by loading and comparing
+        import json
+        with open(json_file, 'r') as f:
+            saved_data = json.load(f)
+        
+        # Should match the result of to_dict()
+        expected_data = result.to_dict()
+        assert saved_data == expected_data
+        
+        # Verify specific fields
+        assert saved_data['job_id'] == 'test-save-json'
+        assert saved_data['input_tokens'] == 100
+        assert saved_data['output_tokens'] == 50
+        assert saved_data['cost_usd'] == 0.05
+        
+        # Verify citations are properly serialized (not Citation objects)
+        assert isinstance(saved_data['citations'][0], dict)
+        assert saved_data['citations'][0]['text'] == 'Test citation text'
+        assert saved_data['citations'][0]['source'] == 'test.pdf'
+        assert saved_data['citations'][0]['page'] == 1
+        
+        # Verify citation_mappings are properly serialized
+        assert isinstance(saved_data['citation_mappings']['test_field'][0], dict)
+        assert saved_data['citation_mappings']['test_field'][0]['text'] == 'Test citation text'
\ No newline at end of file
diff --git a/uv.lock b/uv.lock
index 4165959..65688ec 100644
--- a/uv.lock
+++ b/uv.lock
@@ -130,7 +130,7 @@ wheels = [
 
 [[package]]
 name = "batchata"
-version = "0.4.3"
+version = "0.4.5"
 source = { editable = "." }
 dependencies = [
     { name = "anthropic" },

From d685fb2c48a82c498f57edca104f9e0bdf2af3fa Mon Sep 17 00:00:00 2001
From: Agam More <agam@apprais.ai>
Date: Mon, 11 Aug 2025 20:08:28 -0500
Subject: [PATCH 2/2] Clean up implementation and revert version change

- Remove unused __json__() method from Citation class
- Remove unused BatchataJSONEncoder and json_encoder.py file
- Clean up utils/__init__.py imports
- Revert version back to 0.4.4 (no version bump yet)
- Keep minimal, clean save_to_json() implementation
---
 README.md                      | 18 +++----------
 batchata/types.py              | 13 ----------
 batchata/utils/__init__.py     |  3 +--
 batchata/utils/json_encoder.py | 47 ----------------------------------
 pyproject.toml                 |  2 +-
 5 files changed, 5 insertions(+), 78 deletions(-)
 delete mode 100644 batchata/utils/json_encoder.py

diff --git a/README.md b/README.md
index e09dab4..3c9d80a 100644
--- a/README.md
+++ b/README.md
@@ -112,6 +112,9 @@ for result in results["completed"]:
     print(f"  Vendor: {analysis.vendor} (page: {citations.get("vendor").page})")
     print(f"  Total: ${analysis.total_amount:.2f} (page: {citations.get("total_amount").page})")
     print(f"  Status: {analysis.payment_status} (page: {citations.get("payment_status").page})")
+    
+    # Save each result to JSON file
+    result.save_to_json(f"./invoice_results/{result.job_id}.json")
 
 # Process failed/cancelled results  
 for result in results["failed"]:
@@ -121,21 +124,6 @@ for result in results["cancelled"]:
     print(f"\nJob {result.job_id} was cancelled: {result.error}")
 ```
 
-## Saving Results to JSON
-
-You can easily save individual job results to JSON files:
-
-```python
-# Save individual results to JSON files
-for result in results["completed"]:
-    result.save_to_json(f"output/{result.job_id}.json")
-
-# Or save with custom formatting
-result.save_to_json("my_result.json", indent=4)
-```
-
-This automatically handles Citation serialization and creates any necessary directories.
-
 ## Interactive Progress Display
 
 Batchata provides an interactive real-time progress display when using `print_status=True`:
diff --git a/batchata/types.py b/batchata/types.py
index 5c8e773..909624e 100644
--- a/batchata/types.py
+++ b/batchata/types.py
@@ -12,19 +12,6 @@ class Citation:
     source: str  # Source identifier (e.g., page number, section)
     page: Optional[int] = None  # Page number if applicable
     metadata: Optional[Dict[str, Any]] = None  # Additional metadata
-    
-    def __json__(self):
-        """Make Citation JSON serializable.
-        
-        This method is called by json.dumps() when using the default encoder.
-        Returns a dictionary representation that can be serialized to JSON.
-        """
-        return {
-            'text': self.text,
-            'source': self.source,
-            'page': self.page,
-            'metadata': self.metadata
-        }
 
 
 @dataclass
diff --git a/batchata/utils/__init__.py b/batchata/utils/__init__.py
index 974c2dd..81c926d 100644
--- a/batchata/utils/__init__.py
+++ b/batchata/utils/__init__.py
@@ -6,6 +6,5 @@
 from .logging import get_logger, set_log_level
 from .pdf import create_pdf
 from .rich_progress import RichBatchProgressDisplay
-from .json_encoder import BatchataJSONEncoder
 
-__all__ = ["CostTracker", "to_dict", "StateManager", "get_logger", "set_log_level", "create_pdf", "RichBatchProgressDisplay", "BatchataJSONEncoder"]
\ No newline at end of file
+__all__ = ["CostTracker", "to_dict", "StateManager", "get_logger", "set_log_level", "create_pdf", "RichBatchProgressDisplay"]
\ No newline at end of file
diff --git a/batchata/utils/json_encoder.py b/batchata/utils/json_encoder.py
deleted file mode 100644
index 8afcb4c..0000000
--- a/batchata/utils/json_encoder.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""Custom JSON encoder for batchata objects."""
-
-import json
-from dataclasses import asdict, is_dataclass
-from typing import Any
-from pydantic import BaseModel
-
-from ..types import Citation
-
-
-class BatchataJSONEncoder(json.JSONEncoder):
-    """Custom JSON encoder that handles batchata objects like Citation.
-    
-    This encoder automatically converts Citation objects and other dataclasses
-    to dictionaries for JSON serialization.
-    
-    Usage:
-        ```python
-        import json
-        from batchata.utils import BatchataJSONEncoder
-        
-        # Now JobResult objects with Citation objects can be serialized directly
-        json.dump(job_result, f, cls=BatchataJSONEncoder, indent=2)
-        ```
-    """
-    
-    def default(self, obj: Any) -> Any:
-        """Convert objects to JSON-serializable format."""
-        # Handle Citation objects specifically
-        if isinstance(obj, Citation):
-            return {
-                'text': obj.text,
-                'source': obj.source,
-                'page': obj.page,
-                'metadata': obj.metadata
-            }
-        
-        # Handle other dataclasses
-        if is_dataclass(obj):
-            return asdict(obj)
-        
-        # Handle Pydantic models
-        if isinstance(obj, BaseModel):
-            return obj.model_dump()
-        
-        # Let the base class handle other types
-        return super().default(obj)
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index ef7ffbc..9384c1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "batchata"
-version = "0.4.5"
+version = "0.4.4"
 description = "Unified Python API for AI batch requests with 50% cost savings on OpenAI and Anthropic"
 readme = "README.md"
 requires-python = ">=3.12"