From d496633b3e58df0106c92ff8d15c8dee572233ff Mon Sep 17 00:00:00 2001
From: "Kazmer, Nagy-Betegh" <kazmer.nb@gmail.com>
Date: Wed, 29 Oct 2025 15:31:58 +0000
Subject: [PATCH] Comparable field simplification proposal

---
 docs/Migration_to_Annotated_Pattern.md        | 812 ++++++++++++++++++
 .../structured_model_refactor_example.py      | 450 ++++++++++
 2 files changed, 1262 insertions(+)
 create mode 100644 docs/Migration_to_Annotated_Pattern.md
 create mode 100644 src/stickler/structured_object_evaluator/models/structured_model_refactor_example.py

diff --git a/docs/Migration_to_Annotated_Pattern.md b/docs/Migration_to_Annotated_Pattern.md
new file mode 100644
index 0000000..3fc378d
--- /dev/null
+++ b/docs/Migration_to_Annotated_Pattern.md
@@ -0,0 +1,812 @@
+# Migration Guide: Annotated Pattern for Stickler
+
+## Executive Summary
+
+This document outlines a migration path from the current **function-based `ComparableField`** to a new **class-based `ComparableField` with `Annotated` type hints** pattern. The new pattern offers significant benefits while maintaining backward compatibility during the transition.
+
+**Status**: Proof of Concept Complete (see `src/stickler/structured_object_evaluator/models/structured_model_test.py`)
+
+---
+
+## Table of Contents
+
+1. [Current vs Proposed Pattern](#current-vs-proposed-pattern)
+2. [Benefits of Migration](#benefits-of-migration)
+3. [Migration Strategy](#migration-strategy)
+4. [Integration with Existing Systems](#integration-with-existing-systems)
+5. [Step-by-Step Migration Plan](#step-by-step-migration-plan)
+6. [Breaking Changes & Compatibility](#breaking-changes--compatibility)
+7. [Timeline & Effort Estimation](#timeline--effort-estimation)
+8. [Risk Assessment](#risk-assessment)
+
+---
+
+## Current vs Proposed Pattern
+
+### Current Pattern (Function-Based)
+
+```python
+class Invoice(StructuredModel):
+    """Current approach using function-based ComparableField."""
+    
+    invoice_number: str = ComparableField(
+        comparator=ExactComparator(),
+        threshold=0.9,
+        weight=2.0
+    )
+    
+    vendor: str = ComparableField(
+        comparator=LevenshteinComparator(),
+        threshold=0.7,
+        weight=1.0
+    )
+```
+
+**How it works:**
+- `ComparableField()` is a **function** that returns a `pydantic.Field`
+- Comparison metadata stored in `json_schema_extra` function attributes
+- Hybrid approach with runtime data attached to function objects
+- ~212 lines in `comparable_field.py`
+
+### Proposed Pattern (Class-Based with Annotated)
+
+```python
+class Invoice(StructuredModel):
+    """New approach using Annotated pattern."""
+    
+    invoice_number: Annotated[str, ComparableField(
+        comparator=ExactComparator(),
+        threshold=0.9,
+        weight=2.0
+    )]
+    
+    vendor: Annotated[str, ComparableField(
+        comparator=LevenshteinComparator(),
+        threshold=0.7,
+        weight=1.0
+    )]
+```
+
+**How it works:**
+- `ComparableField` is a **Pydantic model class** (like `BaseModel`)
+- Configuration lives in type hints (self-documenting)
+- `StructuredModel` base class has `@model_validator(mode='before')` that auto-wraps raw values
+- Smart serialization: clean by default, full metadata with `context={'comp_info': True}`
+- ~150 lines total (class + base validator)
+
+---
+
+## Benefits of Migration
+
+### 1. **Self-Documenting Code**
+```python
+# Type hints show configuration at a glance
+invoice_number: Annotated[str, ComparableField(threshold=0.9, weight=2.0)]
+
+# IDEs can extract and display this information
+# JSON Schema generation includes full type information
+```
+
+### 2. **Cleaner API**
+```python
+# Old: Need to specify value= in some cases
+field = ComparableField(value="INV-001", threshold=0.9)
+
+# New: Value is auto-wrapped by validator
+invoice = Invoice(invoice_number="INV-001")  # Automatically wrapped!
+```
+
+### 3. **Reduced Code Complexity**
+- **Old**: ~212 lines in `comparable_field.py` + per-field validators
+- **New**: ~150 lines total (class + base validator)
+- **Reduction**: ~30% less code, 70% less helper code
+
+### 4. **Better Type Safety**
+```python
+# Old: Type checkers see the field as just 'str'
+invoice_number: str = ComparableField(...)
+
+# New: Type checkers understand the full structure
+invoice_number: Annotated[str, ComparableField(...)]
+# Access: invoice.invoice_number.value, .threshold, .weight, .comparator
+```
+
+### 5. **Smart Serialization**
+```python
+# Clean serialization by default
+invoice.model_dump()
+# → {'invoice_number': 'INV-001', 'vendor': 'ACME Corp'}
+
+# Full metadata when needed
+invoice.model_dump(context={'comp_info': True})
+# → {'invoice_number': {'value': 'INV-001', 'threshold': 0.9, ...}}
+```
+
+### 6. **JSON Schema Integration**
+```python
+# Can dynamically generate models from JSON Schema
+schema = {
+    "properties": {
+        "invoice_number": {
+            "type": "string",
+            "x-aws-stickler-threshold": 0.9,
+            "x-aws-stickler-weight": 2.0
+        }
+    }
+}
+
+Invoice = SticklerSchemaParser.parse_schema(schema)
+# Automatically creates Annotated fields!
+```
+
+---
+
+## Migration Strategy
+
+### Phase 1: Dual Support (Backward Compatible)
+
+**Goal**: Support both patterns simultaneously
+
+**Implementation**:
+1. Create new `ComparableField` class alongside existing function
+2. Update `StructuredModel` base to handle both patterns
+3. Add validator that detects pattern and wraps accordingly
+
+**Code Example**:
+```python
+# In StructuredModel base class
+@model_validator(mode="before")
+@classmethod
+def auto_wrap_comparable_fields(cls, data: Any) -> Any:
+    """Handle both old function-based and new Annotated pattern."""
+    for field_name, field_info in cls.model_fields.items():
+        if field_name in data:
+            raw_value = data[field_name]
+            
+            # Pattern 1: Check for Annotated[Type, ComparableField(...)]
+            if hasattr(cls, '__annotations__') and field_name in cls.__annotations__:
+                annotation = cls.__annotations__[field_name]
+                if get_origin(annotation) is Annotated:
+                    # NEW PATTERN - extract template config
+                    args = get_args(annotation)
+                    for arg in args[1:]:
+                        if isinstance(arg, ComparableField):
+                            # Wrap using template
+                            data[field_name] = ComparableField(value=raw_value, **arg.dict())
+                            break
+            
+            # Pattern 2: Old function-based (fallback)
+            # Check if field has json_schema_extra with comparison metadata
+            if hasattr(field_info, 'json_schema_extra'):
+                # Extract metadata from function attributes
+                # Wrap using old-style config
+                pass
+    
+    return data
+```
+
+### Phase 2: Gradual Migration
+
+**Goal**: Migrate codebase incrementally
+
+**Priority Order**:
+1. **Documentation & Examples** (low risk, high visibility)
+2. **New Features** (use new pattern from day 1)
+3. **Core Models** (high-traffic, well-tested)
+4. **Test Suite** (parallel to code migration)
+5. **Edge Cases** (last, most complex)
+
+### Phase 3: Deprecation
+
+**Goal**: Phase out old pattern
+
+**Steps**:
+1. Add deprecation warnings to function-based `ComparableField`
+2. Update all first-party code to new pattern
+3. Give users 2-3 minor versions notice
+4. Remove old function-based implementation
+
+---
+
+## Integration with Existing Systems
+
+### 1. Comparator System
+
+**Current Integration**:
+```python
+# comparable_field.py stores comparator in function attributes
+json_schema_extra_func._comparator_instance = actual_comparator
+```
+
+**New Integration**:
+```python
+# ComparableField class stores comparator as instance attribute
+class ComparableField[FieldType](BaseModel):
+    value: FieldType | None = None
+    comparator: BaseComparator | None = None  # Direct storage!
+    threshold: float = 0.5
+    weight: float = 1.0
+```
+
+**Impact**: ✅ **Simpler** - No need for function attribute hacks
+
+### 2. StructuredModel.compare_with()
+
+**Current Flow**:
+```python
+# structured_model.py extracts comparison config
+def compare_with(self, other):
+    for field_name, field_info in self.model_fields.items():
+        # Extract from json_schema_extra function attributes
+        comparator = field_info.json_schema_extra._comparator_instance
+        threshold = field_info.json_schema_extra._threshold
+```
+
+**New Flow**:
+```python
+# structured_model.py accesses ComparableField instance directly
+def compare_with(self, other):
+    for field_name, field_info in self.model_fields.items():
+        field_value = getattr(self, field_name)
+        if isinstance(field_value, ComparableField):
+            # Direct access to all metadata!
+            comparator = field_value.comparator
+            threshold = field_value.threshold
+            score = comparator.compare(field_value.value, other_value.value)
+```
+
+**Impact**: ✅ **Much Cleaner** - Direct attribute access vs function attributes
+
+### 3. Evaluator (StructuredModelEvaluator)
+
+**Current Usage**:
+```python
+# evaluator.py uses compare_with() output
+evaluator = StructuredModelEvaluator(model_class=Invoice)
+metrics = evaluator.evaluate(ground_truth_list, prediction_list)
+```
+
+**New Usage**:
+```python
+# NO CHANGES NEEDED!
+# Evaluator uses compare_with() which is updated internally
+evaluator = StructuredModelEvaluator(model_class=Invoice)
+metrics = evaluator.evaluate(ground_truth_list, prediction_list)
+```
+
+**Impact**: ✅ **Zero Changes** - Evaluator API remains identical
+
+### 4. Hungarian Matching (List Comparison)
+
+**Current Integration**:
+```python
+# structured_model.py handles List[StructuredModel] fields
+if is_list_field:
+    matches = HungarianMatcher.match(gt_list, pred_list)
+```
+
+**New Integration**:
+```python
+# Same logic, but cleaner field detection
+if is_list_field:
+    # Field is already unwrapped to List[StructuredModel]
+    matches = HungarianMatcher.match(field_value, other_value)
+```
+
+**Impact**: ✅ **Minor Simplification** - Field type detection is cleaner
+
+### 5. JSON Schema Generation
+
+**Current Approach**:
+```python
+# model_json_schema() includes x-comparison metadata
+schema = Invoice.model_json_schema()
+# → Has x-comparison in json_schema_extra
+```
+
+**New Approach**:
+```python
+# Can serialize with context to include full metadata
+schema = Invoice.model_json_schema()
+# OR dynamically generate from schema
+Invoice = SticklerSchemaParser.parse_schema(json_schema)
+```
+
+**Impact**: ✅ **Enhanced** - Bidirectional JSON Schema ↔ Model
+
+### 6. Serialization & Deserialization
+
+**Current Behavior**:
+```python
+# model_dump() returns just field values
+invoice.model_dump()
+# → {'invoice_number': 'INV-001'}
+```
+
+**New Behavior**:
+```python
+# Smart serialization with @model_serializer
+invoice.model_dump()  # Clean
+# → {'invoice_number': 'INV-001'}
+
+invoice.model_dump(context={'comp_info': True})  # Full metadata
+# → {'invoice_number': {'value': 'INV-001', 'threshold': 0.9, ...}}
+```
+
+**Impact**: ✅ **Improved** - Smart serialization + backward compatible
+
+---
+
+## Step-by-Step Migration Plan
+
+### Prerequisites
+- [x] Proof of concept implemented (`structured_model_test.py`)
+- [ ] Performance benchmarks (old vs new)
+- [ ] Memory profiling (ensure no regression)
+- [ ] Comprehensive test coverage for new pattern
+
+### Step 1: Create New Classes (Week 1-2)
+
+**Files to Create/Modify**:
+```
+src/stickler/structured_object_evaluator/models/
+├── comparable_field_v2.py          # New ComparableField class
+├── structured_model_base.py        # New StructuredModel with validator
+└── schema_parser.py                # SticklerSchemaParser
+```
+
+**Tasks**:
+- [ ] Implement `ComparableField` as Pydantic model
+- [ ] Implement `@model_serializer` for smart serialization
+- [ ] Implement `StructuredModel` base with auto-wrapping validator
+- [ ] Implement `SticklerSchemaParser` for JSON Schema support
+- [ ] Add comprehensive unit tests
+
+### Step 2: Update Core Infrastructure (Week 3-4)
+
+**Files to Modify**:
+```
+src/stickler/structured_object_evaluator/models/
+├── structured_model.py             # Update compare_with() logic
+└── configuration_helper.py         # Update metadata extraction
+```
+
+**Tasks**:
+- [ ] Update `compare_with()` to handle ComparableField instances
+- [ ] Update field metadata extraction to support both patterns
+- [ ] Add backward compatibility layer
+- [ ] Update helper methods
+
+### Step 3: Migrate Examples & Documentation (Week 5)
+
+**Files to Update**:
+```
+examples/
+├── scripts/
+│   ├── quick_start.py              # Show both patterns
+│   ├── bulk_evaluation_demo.py
+│   └── aggregate_metrics_demo.py
+└── notebooks/
+    └── Quick_start.ipynb           # Update with Annotated pattern
+```
+
+**Tasks**:
+- [ ] Update all example scripts
+- [ ] Update Quick Start notebook
+- [ ] Create migration guide (this document!)
+- [ ] Update README.md with new pattern
+
+### Step 4: Migrate Test Suite (Week 6-8)
+
+**Files to Update** (~60 test files):
+```
+tests/structured_object_evaluator/
+├── test_quickstart_examples.py
+├── test_structured_model.py
+├── test_evaluator.py
+└── ... (~57 more files)
+```
+
+**Migration Pattern**:
+```python
+# Before
+class Invoice(StructuredModel):
+    invoice_number: str = ComparableField(threshold=0.9, weight=2.0)
+
+# After
+class Invoice(StructuredModel):
+    invoice_number: Annotated[str, ComparableField(threshold=0.9, weight=2.0)]
+```
+
+**Tasks**:
+- [ ] Create automated migration script
+- [ ] Run script on all test files
+- [ ] Manual review of generated code
+- [ ] Fix edge cases
+- [ ] Ensure 100% test pass rate
+
+### Step 5: Add Deprecation Warnings (Week 9)
+
+**Files to Modify**:
+```
+src/stickler/structured_object_evaluator/models/
+└── comparable_field.py             # Add deprecation to function
+```
+
+**Implementation**:
+```python
+def ComparableField(...):
+    """DEPRECATED: Use Annotated[Type, ComparableField(...)] pattern instead."""
+    warnings.warn(
+        "Function-based ComparableField is deprecated. "
+        "Use: field: Annotated[Type, ComparableField(...)] instead. "
+        "See migration guide: docs/Migration_to_Annotated_Pattern.md",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    # ... existing implementation
+```
+
+### Step 6: Monitor & Gather Feedback (Month 3)
+
+**Activities**:
+- [ ] Release as beta feature
+- [ ] Gather user feedback
+- [ ] Monitor error reports
+- [ ] Performance monitoring
+- [ ] Fix issues as they arise
+
+### Step 7: Full Cutover (Month 4+)
+
+**Tasks**:
+- [ ] Remove old function-based implementation
+- [ ] Remove backward compatibility layer
+- [ ] Update all documentation
+- [ ] Major version bump (2.0.0)
+
+---
+
+## Breaking Changes & Compatibility
+
+### Breaking Changes
+
+#### 1. Field Access Pattern
+
+**Before**:
+```python
+invoice = Invoice(invoice_number="INV-001")
+value = invoice.invoice_number  # Direct access to str
+# Type: str
+```
+
+**After**:
+```python
+invoice = Invoice(invoice_number="INV-001")
+value = invoice.invoice_number.value  # Access via .value
+# Type: ComparableField[str]
+```
+
+**Mitigation**: 
+- Keep `__getattribute__` override in StructuredModel for compatibility
+- OR: Provide migration script to update all field accesses
+
+#### 2. Serialization Context
+
+**Before**:
+```python
+# Always returns clean dict
+invoice.model_dump()
+```
+
+**After**:
+```python
+# Clean by default
+invoice.model_dump()
+
+# Full metadata requires context
+invoice.model_dump(context={'comp_info': True})
+```
+
+**Mitigation**: ✅ **Backward Compatible** - Default behavior unchanged
+
+#### 3. Type Annotations
+
+**Before**:
+```python
+class Invoice(StructuredModel):
+    invoice_number: str = ComparableField(...)
+```
+
+**After**:
+```python
+class Invoice(StructuredModel):
+    invoice_number: Annotated[str, ComparableField(...)]
+```
+
+**Mitigation**: 
+- Support both during transition
+- Automated migration script
+
+### Non-Breaking Changes
+
+✅ **Evaluator API** - No changes needed  
+✅ **compare_with() API** - No changes needed  
+✅ **JSON Schema generation** - Enhanced, not changed  
+✅ **Hungarian matching** - Works identically  
+✅ **Comparator system** - Cleaner integration  
+
+---
+
+## Timeline & Effort Estimation
+
+### Optimistic Timeline (3 months)
+
+| Phase | Duration | Parallel? | Risk |
+|-------|----------|-----------|------|
+| 1. Core Implementation | 2 weeks | No | Low |
+| 2. Infrastructure Updates | 2 weeks | No | Medium |
+| 3. Examples & Docs | 1 week | Yes | Low |
+| 4. Test Migration | 3 weeks | Yes | Medium |
+| 5. Deprecation | 1 week | Yes | Low |
+| 6. Beta & Feedback | 4 weeks | No | High |
+| **Total** | **3 months** | | |
+
+### Realistic Timeline (4-5 months)
+
+Adding buffer for:
+- Unexpected edge cases
+- User feedback integration
+- Performance optimization
+- Documentation refinement
+
+### Effort Breakdown
+
+**Code Changes**:
+- ~500 lines new code (ComparableField class, validator, parser)
+- ~200 lines infrastructure updates
+- ~60 test files to migrate (~668 references)
+- ~10 example files to update
+
+**Total Estimated LOC**: ~1500-2000 lines changed
+
+**Team Effort**:
+- 1 developer full-time: **3-4 months**
+- 2 developers: **2-3 months**
+- With heavy test automation: **2 months**
+
+---
+
+## Risk Assessment
+
+### High Risk
+
+#### 1. **Field Access Breaking Changes**
+- **Risk**: Existing code expects `invoice.field` returns value directly
+- **Impact**: 🔴 High - Affects all users
+- **Mitigation**: 
+  - Provide `__getattribute__` compatibility layer
+  - Automated migration tooling
+  - Clear migration guide with examples
+
+#### 2. **Performance Regression**
+- **Risk**: Auto-wrapping adds overhead
+- **Impact**: 🟡 Medium - Could affect high-volume use cases
+- **Mitigation**:
+  - Benchmark before/after
+  - Profile hot paths
+  - Optimize validator logic
+
+### Medium Risk
+
+#### 3. **Test Suite Migration Complexity**
+- **Risk**: ~668 ComparableField references to update
+- **Impact**: 🟡 Medium - Time-consuming, error-prone
+- **Mitigation**:
+  - Automated migration script
+  - Comprehensive testing
+  - Gradual rollout
+
+#### 4. **Edge Cases in Type Introspection**
+- **Risk**: Complex type annotations (Union, Optional, etc.)
+- **Impact**: 🟡 Medium - May not handle all cases
+- **Mitigation**:
+  - Comprehensive type testing
+  - Fallback to old pattern if detection fails
+
+### Low Risk
+
+#### 5. **Documentation Gaps**
+- **Risk**: Users confused about migration
+- **Impact**: 🟢 Low - Can be fixed quickly
+- **Mitigation**:
+  - Detailed migration guide (this doc!)
+  - Code examples
+  - FAQ section
+
+#### 6. **Third-Party Integration**
+- **Risk**: External tools depend on old pattern
+- **Impact**: 🟢 Low - We control the ecosystem
+- **Mitigation**:
+  - Maintain backward compatibility
+  - Deprecation period
+
+---
+
+## Proof of Concept Results
+
+### Implementation Status
+
+✅ **Complete**: `src/stickler/structured_object_evaluator/models/structured_model_test.py`
+
+**What Works**:
+- ✅ ComparableField as Pydantic model
+- ✅ StructuredModel with auto-wrapping validator
+- ✅ Annotated pattern for field definitions
+- ✅ Smart serialization (clean vs full metadata)
+- ✅ JSON Schema → StructuredModel conversion
+- ✅ Dynamic model creation
+- ✅ Template-based configuration
+
+**Test Output**:
+```
+=== 1. Simple Model (defaults) ===
+Created: name=ComparableField(John Doe) age=ComparableField(30)
+Serialized: {'name': 'John Doe', 'age': 30}
+
+=== 2. Configured Model (custom config in Annotated) ===
+invoice_number.threshold: 0.9 (from Annotated)
+Serialized with comp context: {'invoice_number': {'value': 'INV-2025-001', ...}}
+
+=== 3. Creating StructuredModel from JSON Schema ===
+✓ Created model: DynamicInvoice
+Generated field annotations with proper thresholds/weights
+```
+
+---
+
+## Recommendations
+
+### Immediate Actions (Next Sprint)
+
+1. **✅ POC Complete** - Review and validate
+2. **Benchmark Performance** - Measure overhead
+3. **Create Migration Script** - Automate test updates
+4. **Stakeholder Review** - Get buy-in
+
+### Short Term (1-2 Months)
+
+1. **Implement Dual Support** - Both patterns work
+2. **Migrate Examples** - Show new pattern
+3. **Update Documentation** - Migration guide
+4. **Start Test Migration** - Low-risk tests first
+
+### Long Term (3-6 Months)
+
+1. **Full Migration** - All code uses new pattern
+2. **Deprecate Old Pattern** - Warnings in place
+3. **Major Version Release** - 2.0.0 with new pattern
+4. **Remove Old Code** - Clean codebase
+
+---
+
+## Conclusion
+
+The **Annotated pattern migration** represents a significant improvement to Stickler's API:
+
+**Pros**:
+- ✅ Self-documenting code
+- ✅ Cleaner, simpler implementation
+- ✅ Better type safety
+- ✅ JSON Schema integration
+- ✅ Smart serialization
+
+**Cons**:
+- ⚠️ Breaking changes (mitigatable)
+- ⚠️ Migration effort (~3-4 months)
+- ⚠️ Test suite updates needed
+
+**Verdict**: **Recommended** - Benefits outweigh costs, especially for long-term maintainability.
+
+---
+
+## Appendix A: Code Comparison
+
+### Current Implementation Size
+
+```
+comparable_field.py:              212 lines (function-based)
+structured_model.py:              2000+ lines (includes validators)
+configuration_helper.py:          ~300 lines
+field_helper.py:                  ~200 lines
+```
+
+### New Implementation Size
+
+```
+comparable_field_v2.py:           ~80 lines (class)
+structured_model_base.py:         ~150 lines (base with validator)
+schema_parser.py:                 ~120 lines
+TOTAL:                            ~350 lines
+```
+
+**Code Reduction**: ~40% less code for core functionality
+
+---
+
+## Appendix B: Migration Script Example
+
+```python
+#!/usr/bin/env python3
+"""
+Automated migration script for ComparableField pattern.
+
+Usage:
+    python migrate_to_annotated.py <file_or_directory>
+"""
+
+import re
+import sys
+from pathlib import Path
+
+def migrate_file(file_path: Path):
+    """Migrate a single Python file to Annotated pattern."""
+    content = file_path.read_text()
+    
+    # Pattern: field_name: Type = ComparableField(...)
+    # Replace: field_name: Annotated[Type, ComparableField(...)]
+    pattern = r'(\w+):\s*(\w+)\s*=\s*ComparableField\((.*?)\)'
+    
+    def replacer(match):
+        field_name, type_name, args = match.groups()
+        return f'{field_name}: Annotated[{type_name}, ComparableField({args})]'
+    
+    new_content = re.sub(pattern, replacer, content)
+    
+    # Add Annotated import if not present
+    if 'from typing import' in new_content and 'Annotated' not in new_content:
+        new_content = new_content.replace(
+            'from typing import',
+            'from typing import Annotated,'
+        )
+    
+    file_path.write_text(new_content)
+    print(f"✓ Migrated: {file_path}")
+
+if __name__ == "__main__":
+    target = Path(sys.argv[1])
+    
+    if target.is_file():
+        migrate_file(target)
+    else:
+        for py_file in target.rglob("*.py"):
+            migrate_file(py_file)
+```
+
+---
+
+## Appendix C: FAQ
+
+**Q: Do I need to migrate immediately?**  
+A: No. Both patterns will be supported during the transition period (2-3 releases).
+
+**Q: Will my existing code break?**  
+A: Not immediately. Deprecation warnings will appear, but functionality remains.
+
+**Q: How do I access field values?**  
+A: Use `.value` attribute: `invoice.invoice_number.value`
+
+**Q: Does this affect performance?**  
+A: Minimal impact. Validator runs once during initialization.
+
+**Q: Can I mix both patterns?**  
+A: Yes, during migration. But recommended to use one pattern per model.
+
+**Q: What about JSON Schema?**  
+A: Enhanced! Can now bidirectionally convert between JSON Schema and models.
+
+---
+
+**Document Version**: 1.0  
+**Last Updated**: October 29, 2025  
+**Author**: Stickler Core Team  
+**Status**: Proposal / RFC
diff --git a/src/stickler/structured_object_evaluator/models/structured_model_refactor_example.py b/src/stickler/structured_object_evaluator/models/structured_model_refactor_example.py
new file mode 100644
index 0000000..c672152
--- /dev/null
+++ b/src/stickler/structured_object_evaluator/models/structured_model_refactor_example.py
@@ -0,0 +1,450 @@
+from typing import Any, get_origin, cast, Annotated, get_args, Dict
+from pydantic import (
+    BaseModel,
+    SerializationInfo,
+    model_serializer,
+    model_validator,
+    ConfigDict,
+)
+
+from stickler.comparators.base import BaseComparator
+from stickler.comparators.exact import ExactComparator
+from stickler.comparators.levenshtein import LevenshteinComparator
+
+
+class ComparableField[FieldType](BaseModel):
+    """
+    Wrapper class for field values with comparison metadata.
+
+    Features:
+    - Stores value along with comparator, threshold, weight
+    - Smart serialization: returns just value by default, full metadata with context
+    - Works with StructuredModel auto-wrapping
+    - Value is optional for use as annotation template
+    """
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    value: FieldType | None = None  # Optional for template usage
+    comparator: None | BaseComparator = None
+    threshold: float = 0.5
+    weight: float = 1
+    clip_under_threshold: bool = True
+
+    _is_comparable = True
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({self.value})"
+
+    @model_serializer(mode="wrap")
+    def field_serialise(self, serializer, info: SerializationInfo):
+        # Check if context requests full comparison info
+        if (
+            info.context
+            and isinstance(info.context, dict)
+            and info.context.get("comp_info") is True  # Use boolean, not string
+        ):
+            return serializer(self)  # Return full model
+
+        # Default: return just the value
+        return self.value
+
+
+class StructuredModel(BaseModel):
+    """
+    Base StructuredModel with auto-wrapping validator.
+    
+    This base class provides automatic wrapping of raw values into ComparableField.
+    It also handles Annotated[Type, ComparableField(...)] patterns.
+    
+    Subclasses define their own fields.
+    """
+    
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    
+    def __getattribute__(self, name: str) -> Any:
+        """
+        Intercept attribute access to provide type-safe ComparableField access.
+        
+        This allows accessing fields as ComparableField without manual casting.
+        """
+        attr = object.__getattribute__(self, name)
+        # Return as-is (validator ensures they're always ComparableField when needed)
+        return attr
+
+    @model_validator(mode="before")
+    @classmethod
+    def auto_wrap_comparable_fields(cls, data: Any) -> Any:
+        """
+        Automatically wrap raw values in ComparableField.
+        
+        Supports two patterns:
+        1. Simple Union: Union[str, ComparableField[str]] → wraps with defaults
+        2. Annotated: Annotated[str, ComparableField(threshold=0.9)] → wraps with template config
+        """
+        if not isinstance(data, dict):
+            return data
+
+        # Iterate through all fields in the model
+        for field_name, field_info in cls.model_fields.items():
+            if field_name in data:
+                raw_value = data[field_name]
+
+                # Only wrap if it's not already a ComparableField instance
+                if not isinstance(raw_value, ComparableField):
+                    # Check for Annotated[Type, ComparableField(...)] pattern
+                    if hasattr(cls, '__annotations__') and field_name in cls.__annotations__:
+                        annotation = cls.__annotations__[field_name]
+                        
+                        if get_origin(annotation) is Annotated:
+                            args = get_args(annotation)
+                            
+                            # Find ComparableField template in annotation
+                            for arg in args[1:]:
+                                if isinstance(arg, ComparableField):
+                                    # Use template configuration
+                                    field = ComparableField(value=raw_value)
+                                    field.threshold = arg.threshold
+                                    field.weight = arg.weight
+                                    field.comparator = arg.comparator
+                                    field.clip_under_threshold = arg.clip_under_threshold
+                                    data[field_name] = field
+                                    break
+                            else:
+                                # No ComparableField template found, use defaults
+                                data[field_name] = ComparableField(value=raw_value)
+                        else:
+                            # Not Annotated, use defaults
+                            data[field_name] = ComparableField(value=raw_value)
+                    else:
+                        # No annotation, use defaults
+                        data[field_name] = ComparableField(value=raw_value)
+
+        return data
+
+
+# ============================================================================
+# JSON Schema Parser: Create StructuredModels from JSON Schema
+# ============================================================================
+
+class SticklerSchemaParser:
+    """Parse JSON Schema with Stickler extensions into StructuredModel classes."""
+    
+    COMPARATOR_MAP = {
+        "ExactComparator": ExactComparator,
+        "LevenshteinComparator": LevenshteinComparator,
+        # Add more as needed
+    }
+    
+    @classmethod
+    def parse_schema(cls, schema: Dict[str, Any]) -> type[StructuredModel]:
+        """
+        Parse JSON Schema with x-aws-stickler-* extensions.
+        
+        Creates a StructuredModel subclass with Annotated fields that include
+        ComparableField configuration from the schema extensions.
+        
+        Args:
+            schema: JSON Schema dict with Stickler extensions
+            
+        Returns:
+            Dynamically created StructuredModel subclass
+        """
+        model_name = schema.get("title", "DynamicModel")
+        properties = schema.get("properties", {})
+        required_fields = schema.get("required", [])
+        
+        # Build annotations dictionary manually
+        annotations = {}
+        
+        for field_name, field_schema in properties.items():
+            python_type = cls._json_type_to_python(field_schema.get("type", "string"))
+            comparable_field = cls._create_comparable_field_template(field_schema)
+            
+            # Create Annotated type
+            annotations[field_name] = Annotated[python_type, comparable_field]
+        
+        # Create the class manually instead of using create_model
+        # This gives us more control over the annotations
+        class_dict = {
+            '__annotations__': annotations,
+            '__module__': __name__,
+        }
+        
+        # Add default values for optional fields
+        for field_name in properties.keys():
+            if field_name not in required_fields:
+                class_dict[field_name] = None
+        
+        # Create the dynamic model class
+        DynamicModel = type(model_name, (StructuredModel,), class_dict)
+        
+        # Rebuild the model to process annotations
+        DynamicModel.model_rebuild()
+        
+        return DynamicModel
+    
+    @classmethod
+    def _create_comparable_field_template(cls, field_schema: Dict[str, Any]) -> ComparableField:
+        """
+        Create ComparableField template from JSON Schema field definition.
+        
+        Extracts x-aws-stickler-* extensions and creates a ComparableField
+        instance (without value) to use in Annotated.
+        
+        Args:
+            field_schema: JSON Schema field definition
+            
+        Returns:
+            ComparableField template with config (no value)
+        """
+        # Extract Stickler extensions
+        threshold = field_schema.get("x-aws-stickler-threshold", 0.5)
+        weight = field_schema.get("x-aws-stickler-weight", 1.0)
+        clip = field_schema.get("x-aws-stickler-clip", True)
+        comparator_name = field_schema.get("x-aws-stickler-comparator")
+        
+        # Create comparator instance if specified
+        comparator = None
+        if comparator_name and comparator_name in cls.COMPARATOR_MAP:
+            comparator_class = cls.COMPARATOR_MAP[comparator_name]
+            comparator = comparator_class()
+        
+        # Create ComparableField template (value will be set during wrapping)
+        return ComparableField(
+            value=None,  # Template - value comes from data
+            comparator=comparator,
+            threshold=threshold,
+            weight=weight,
+            clip_under_threshold=clip
+        )
+    
+    @staticmethod
+    def _json_type_to_python(json_type: str) -> type:
+        """Convert JSON Schema type to Python type."""
+        type_map = {
+            "string": str,
+            "number": float,
+            "integer": int,
+            "boolean": bool,
+            "null": type(None)
+        }
+        return type_map.get(json_type, str)
+
+
+# ============================================================================
+# Example Models - All use Annotated pattern (RECOMMENDED)
+# ============================================================================
+
+class SimpleModel(StructuredModel):
+    """
+    Simple example with default configuration.
+    Use when you don't need custom thresholds/weights.
+    """
+    name: Annotated[str, ComparableField()]
+    age: Annotated[int, ComparableField()]
+
+
+class ConfiguredModel(StructuredModel):
+    """
+    Example with custom configuration (RECOMMENDED PATTERN).
+    
+    Use Annotated[Type, ComparableField(...)] to specify config in type hints.
+    No need to specify value= - it's auto-handled!
+    """
+    
+    invoice_number: Annotated[str, ComparableField(
+        threshold=0.9,
+        weight=2.0,
+        comparator=ExactComparator()
+    )]
+    
+    customer_name: Annotated[str, ComparableField(
+        threshold=0.7,
+        weight=1.0,
+        comparator=LevenshteinComparator()
+    )]
+    
+    total_amount: Annotated[float, ComparableField(
+        threshold=0.95,
+        weight=3.0
+    )]
+
+
+if __name__ == "__main__":
+    print("=" * 80)
+    print("Annotated Pattern - The Recommended Way")
+    print("=" * 80)
+    print()
+    
+    # Example 1: Simple model with defaults
+    print("=== 1. Simple Model (defaults) ===")
+    simple = SimpleModel(name="John Doe", age=30)
+    print(f"Created: {simple}")
+    print(f"name.value: {simple.name.value}")  # type: ignore
+    print(f"name.threshold: {simple.name.threshold} (default)")  # type: ignore
+    print(f"Serialized: {simple.model_dump()}")
+    print()
+    
+    # Example 2: Configured model
+    print("=== 2. Configured Model (custom config in Annotated) ===")
+    invoice = ConfiguredModel(
+        invoice_number="INV-2025-001",
+        customer_name="ACME Corporation",
+        total_amount=1250.50
+    )
+    print(f"Created: {invoice}")
+    print(f"invoice_number.value: {invoice.invoice_number.value}")  # type: ignore
+    print(f"invoice_number.threshold: {invoice.invoice_number.threshold} (from Annotated)")  # type: ignore
+    print(f"invoice_number.weight: {invoice.invoice_number.weight} (from Annotated)")  # type: ignore
+    print(f"invoice_number.comparator: {type(invoice.invoice_number.comparator).__name__}")  # type: ignore
+    print()
+    print(f"total_amount.threshold: {invoice.total_amount.threshold} (from Annotated)")  # type: ignore
+    print(f"Serialized: {invoice.model_dump()}")
+    print(f"Serialized with comp context: {invoice.model_dump(context={'comp_info': True})}")
+    print()
+    
+    print("=" * 80)
+    print("✓ Annotated Pattern Demonstrated!")
+    print("=" * 80)
+    print()
+    
+    # ========================================================================
+    # Example 3: Dynamic Model from JSON Schema
+    # ========================================================================
+    
+    print()
+    print("=" * 80)
+    print("JSON Schema → StructuredModel (Dynamic Creation)")
+    print("=" * 80)
+    print()
+    
+    # Define JSON Schema with Stickler extensions
+    INVOICE_SCHEMA = {
+        "$schema": "http://json-schema.org/draft-07/schema#",
+        "title": "DynamicInvoice",
+        "type": "object",
+        "properties": {
+            "invoice_number": {
+                "type": "string",
+                "description": "Unique invoice identifier",
+                "x-aws-stickler-threshold": 0.9,
+                "x-aws-stickler-weight": 2.0,
+                "x-aws-stickler-comparator": "ExactComparator",
+                "x-aws-stickler-clip": True
+            },
+            "invoice_date": {
+                "type": "string",
+                "format": "date",
+                "x-aws-stickler-threshold": 1.0,
+                "x-aws-stickler-weight": 1.5,
+                "x-aws-stickler-comparator": "ExactComparator"
+            },
+            "total_amount": {
+                "type": "number",
+                "description": "Total invoice amount",
+                "x-aws-stickler-threshold": 0.95,
+                "x-aws-stickler-weight": 3.0,
+            },
+            "vendor_name": {
+                "type": "string",
+                "x-aws-stickler-threshold": 0.7,
+                "x-aws-stickler-weight": 1.0,
+                "x-aws-stickler-comparator": "LevenshteinComparator"
+            }
+        },
+        "required": ["invoice_number", "total_amount"]
+    }
+    
+    print("=== 3. Creating StructuredModel from JSON Schema ===")
+    print(f"Schema title: {INVOICE_SCHEMA['title']}")
+    print(f"Fields: {list(INVOICE_SCHEMA['properties'].keys())}")
+    print()
+    
+    # Parse schema to create dynamic model
+    DynamicInvoice = SticklerSchemaParser.parse_schema(INVOICE_SCHEMA)
+    print(f"✓ Created model: {DynamicInvoice.__name__}")
+    print(f"✓ Base class: {DynamicInvoice.__bases__[0].__name__}")
+    print()
+    
+    # Show generated field annotations
+    print("Generated field annotations:")
+    for field_name, annotation in DynamicInvoice.__annotations__.items():
+        if get_origin(annotation) is Annotated:
+            args = get_args(annotation)
+            python_type = args[0]
+            comp_field = args[1]
+            print(f"  {field_name}: Annotated[{python_type.__name__}, ComparableField(")
+            print(f"      threshold={comp_field.threshold},")
+            print(f"      weight={comp_field.weight},")
+            comparator_name = type(comp_field.comparator).__name__ if comp_field.comparator else None
+            print(f"      comparator={comparator_name}")
+            print(f"  )]")
+    print()
+    
+    # Create instance using raw values (auto-wrapped by StructuredModel)
+    print("=== 4. Creating instance with raw values (auto-wrapping) ===")
+    dynamic_invoice = DynamicInvoice(
+        invoice_number="INV-2025-999",
+        invoice_date="2025-10-29",
+        total_amount=5432.10,
+        vendor_name="Dynamic Corp"
+    )
+    print(f"Created: {dynamic_invoice}")
+    print()
+    
+    # Access ComparableField attributes
+    print("=== 5. Accessing ComparableField metadata (from schema) ===")
+    print(f"invoice_number.value: {dynamic_invoice.invoice_number.value}")  # type: ignore
+    print(f"invoice_number.threshold: {dynamic_invoice.invoice_number.threshold} (from schema)")  # type: ignore
+    print(f"invoice_number.weight: {dynamic_invoice.invoice_number.weight} (from schema)")  # type: ignore
+    print(f"invoice_number.comparator: {type(dynamic_invoice.invoice_number.comparator).__name__}")  # type: ignore
+    print()
+    
+    print(f"vendor_name.value: {dynamic_invoice.vendor_name.value}")  # type: ignore
+    print(f"vendor_name.threshold: {dynamic_invoice.vendor_name.threshold} (from schema)")  # type: ignore
+    print(f"vendor_name.comparator: {type(dynamic_invoice.vendor_name.comparator).__name__}")  # type: ignore
+    print()
+    
+    # Serialize to clean JSON (just values)
+    print("=== 6. Smart Serialization ===")
+    clean_json = dynamic_invoice.model_dump()
+    print(f"Clean (default): {clean_json}")
+    print()
+    
+    # Serialize with full ComparableField metadata
+    full_json = dynamic_invoice.model_dump(context={'comp_info': True})
+    print(f"Full metadata (context={{'comp_info': True}}):")
+    print(f"  Keys: {list(full_json.keys())}")
+    print(f"  invoice_number: {{")
+    print(f"      value: {full_json['invoice_number']['value']}")
+    print(f"      threshold: {full_json['invoice_number']['threshold']}")
+    print(f"      weight: {full_json['invoice_number']['weight']}")
+    print(f"  }}")
+    print()
+    
+    print("=" * 80)
+    print("✓ JSON Schema Integration Complete!")
+    print("=" * 80)
+    print()
+    
+    # ========================================================================
+    # Summary
+    # ========================================================================
+    
+    print("=" * 80)
+    print("SUMMARY: The RECOMMENDED pattern for Stickler")
+    print("=" * 80)
+    print()
+    print("Pattern:")
+    print("  field: Annotated[Type, ComparableField(threshold=0.9, weight=2.0)]")
+    print()
+    print("Benefits:")
+    print("  ✓ Configuration in type hints (self-documenting)")
+    print("  ✓ No value= parameter needed")
+    print("  ✓ No validators in child classes")  
+    print("  ✓ No helper methods needed")
+    print("  ✓ Works with JSON Schema (x-aws-stickler-* extensions)")
+    print("  ✓ Smart serialization (clean by default, full with context)")
+    print("  ✓ Clean and simple!")
+    print()