From 69230a39f85a4b63574b71be17ad277bb08ce42f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Dec 2025 01:49:13 +0000
Subject: [PATCH 1/3] Initial plan


From 5c945eb81895dbcea541b05a0e0ef141a6672aa2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Dec 2025 02:00:51 +0000
Subject: [PATCH 2/3] Fix broken tests, lazy-load OpenAI client, and add
 comprehensive project documentation

Co-authored-by: yigagilbert <97894246+yigagilbert@users.noreply.github.com>
---
 docs/PROJECT_ANALYSIS.md           | 422 +++++++++++++++++++++++++++++
 tests/test_api_buffer.py           |   7 +
 tests/test_buffer_functionality.py | 102 ++++---
 tests/test_geospatial_analyzer.py  |  66 +++--
 tests/test_integration.py          |   8 +-
 tests/test_utilities.py            |  76 ++++--
 utils/llm_function_caller.py       |  19 +-
 7 files changed, 602 insertions(+), 98 deletions(-)
 create mode 100644 docs/PROJECT_ANALYSIS.md

diff --git a/docs/PROJECT_ANALYSIS.md b/docs/PROJECT_ANALYSIS.md
new file mode 100644
index 0000000..09d77d6
--- /dev/null
+++ b/docs/PROJECT_ANALYSIS.md
@@ -0,0 +1,422 @@
+# Suntrace GeospatialAnalyzer - Project Analysis
+
+## Overview
+
+**Suntrace** is a multimodal geospatial analysis platform for off-grid electrification planning in Uganda. It integrates GIS data, LLM function calling (via OpenAI and Google Gemini), and interactive web visualization using Leaflet maps with a chat interface.
+
+### Purpose
+The platform helps government officials, planners, and development practitioners make informed decisions about rural energy infrastructure, focusing on:
+- Solar energy potential assessment
+- Mini-grid site evaluation
+- Settlement density analysis
+- Infrastructure connectivity assessment
+
+---
+
+## Architecture
+
+### High-Level Architecture
+
+```
+┌──────────────────────────────────────────────────────────────────┐
+│                        Frontend (templates/index.html)           │
+│  ┌─────────────────┐           ┌────────────────────────────────┐│
+│  │   Leaflet Map   │           │       Chat Interface          ││
+│  │  - Draw regions │           │  - Natural language queries   ││
+│  │  - View layers  │           │  - Markdown response display  ││
+│  └─────────────────┘           └────────────────────────────────┘│
+└──────────────────────────────────────────────────────────────────┘
+                                   │
+                                   ▼
+┌──────────────────────────────────────────────────────────────────┐
+│                      FastAPI Backend (main.py)                   │
+│  ┌─────────────────┐           ┌────────────────────────────────┐│
+│  │   Map Routes    │           │       Query Routes            ││
+│  │  /get_map_layers│           │  /query (LLM processing)      ││
+│  │  /              │           │  /health                      ││
+│  └─────────────────┘           └────────────────────────────────┘│
+└──────────────────────────────────────────────────────────────────┘
+                                   │
+                                   ▼
+┌──────────────────────────────────────────────────────────────────┐
+│                    GeospatialService Layer                       │
+│     (app/services/geospatial.py)                                │
+│  - Coordinates map layer retrieval                              │
+│  - Processes natural language queries with LLM                  │
+│  - Converts polygon coordinates to WKT                          │
+└──────────────────────────────────────────────────────────────────┘
+                                   │
+                         ┌─────────┴─────────┐
+                         ▼                   ▼
+┌────────────────────────────────┐  ┌────────────────────────────────┐
+│    GeospatialAnalyzer          │  │    LangGraph Function Caller   │
+│    (utils/GeospatialAnalyzer.py)│  │    (utils/langraph_function_  │
+│                                │  │     caller.py)                 │
+│  - File-based GeoDataFrame ops │  │  - LLM tool calling via       │
+│  - GeoPandas for spatial joins │  │    LangGraph/LangChain        │
+│  - Shapely geometry processing │  │  - Function tools for spatial  │
+│  - NDVI/environmental metrics  │  │    analysis                   │
+└────────────────────────────────┘  └────────────────────────────────┘
+           │                                     │
+           │                                     ▼
+           │                        ┌────────────────────────────────┐
+           │                        │    GeospatialAnalyzer2         │
+           │                        │    (PostGIS-optimized)         │
+           │                        │  - SQL-based spatial queries   │
+           │                        │  - Efficient for scale         │
+           │                        │  - Falls back to GeoPandas     │
+           └───────────────────────►└────────────────────────────────┘
+                                                 │
+                                                 ▼
+                                    ┌────────────────────────────────┐
+                                    │        Data Sources            │
+                                    │  - GeoJSON/GeoPackage files    │
+                                    │  - PostGIS database            │
+                                    │  - CSV tile statistics         │
+                                    └────────────────────────────────┘
+```
+
+---
+
+## Core Components
+
+### 1. GeospatialAnalyzer (File-Based)
+**Location:** `utils/GeospatialAnalyzer.py`
+
+The primary analyzer class that loads and processes geospatial data using GeoPandas. It provides:
+
+- **Data Loading**: Reads GeoJSON, GeoPackage, and CSV files
+- **Spatial Operations**: Intersection, buffering, distance calculations
+- **Environmental Metrics**: NDVI, elevation, slope, rainfall analysis
+- **Settlement Analysis**: Building counts, village information
+- **Infrastructure Analysis**: Roads, grid infrastructure, minigrids
+
+**Key Methods:**
+```python
+count_features_within_region(region, layer_name, filter_expr)
+analyze_region(region)  # Comprehensive region analysis
+weighted_tile_stats_all(region)  # Environmental metrics
+compute_distance_to_grid(geometry)
+buffer_geometry(geom, radius_m)
+```
+
+### 2. GeospatialAnalyzer2 (PostGIS-Optimized)
+**Location:** `utils/GeospatialAnalyzer2.py`
+
+An optimized version that pushes spatial operations to PostGIS when available:
+
+- **SQL-based Queries**: Uses ST_Intersects, ST_Area, etc.
+- **Spatial Indexing**: Creates GiST indexes for performance
+- **Lazy Loading**: Caches results for repeated queries
+- **Fallback**: Falls back to GeoPandas when DB unavailable
+
+### 3. Factory Pattern
+**Location:** `utils/factory.py`
+
+Creates the appropriate analyzer based on environment:
+```python
+def create_geospatial_analyzer():
+    if use_postgis:
+        return GeospatialAnalyzer2(database_uri=uri)
+    else:
+        return GeospatialAnalyzer(file_paths...)
+```
+
+### 4. LLM Integration
+
+**LangGraph Agent** (`utils/langraph_function_caller.py`):
+- Uses LangChain and LangGraph for tool-calling
+- Supports Google Gemini and OpenAI models
+- Maintains conversation state with InMemorySaver
+- Provides tools for spatial analysis
+
+**Legacy OpenAI Client** (`utils/llm_function_caller.py`):
+- Direct OpenAI API integration
+- Manual tool dispatch and response handling
+
+---
+
+## Data Layers
+
+The analyzer supports these geospatial layers:
+
+| Layer | Description | Geometry Type |
+|-------|-------------|---------------|
+| `buildings` | Building footprints | Polygon |
+| `tiles` | Analysis grid tiles with statistics | Polygon |
+| `roads` | Road network | LineString |
+| `villages` | Village boundaries | Polygon |
+| `parishes` | Parish boundaries | Polygon |
+| `subcounties` | Subcounty boundaries | Polygon |
+| `existing_grid` | Existing electricity grid | LineString |
+| `grid_extension` | Proposed grid extensions | LineString |
+| `candidate_minigrids` | Candidate minigrid sites | Polygon/Point |
+| `existing_minigrids` | Existing minigrids | Polygon/Point |
+
+---
+
+## Strengths 💪
+
+### 1. **Well-Structured Architecture**
+- Clear separation of concerns (API, services, core, utilities)
+- Factory pattern for flexible analyzer instantiation
+- Dependency injection with FastAPI
+
+### 2. **Dual Backend Support**
+- File-based GeoPandas for development/small datasets
+- PostGIS optimization for production scale
+- Automatic fallback mechanism
+
+### 3. **LLM Integration**
+- Natural language interface for geospatial queries
+- Tool-calling with function schemas
+- Support for multiple LLM providers (OpenAI, Google Gemini)
+
+### 4. **Comprehensive Analysis Capabilities**
+- Environmental metrics (NDVI, elevation, rainfall)
+- Settlement analysis (buildings, villages)
+- Infrastructure analysis (roads, grid, minigrids)
+- Distance calculations and buffering
+
+### 5. **Interactive Frontend**
+- Leaflet-based map with drawing tools
+- Chat interface with markdown rendering
+- Layer controls and legends
+- Responsive design with Bootstrap
+
+### 6. **Container-Ready**
+- Dockerfile for easy deployment
+- Docker Compose support
+- Google Cloud Run deployment scripts
+
+### 7. **Good Documentation**
+- Comprehensive README with setup instructions
+- Testing guide (TESTING.md)
+- Docstrings on major functions
+
+---
+
+## Weaknesses 🔧
+
+### 1. **Outdated/Broken Tests**
+The test suite has issues:
+```python
+# Test uses old signature (minigrids_path)
+analyzer = GeospatialAnalyzer(
+    buildings_path="mock.gpkg",
+    minigrids_path="mock.gpkg",  # This parameter no longer exists!
+    ...
+)
+```
+The `GeospatialAnalyzer.__init__` signature was updated but tests weren't.
+
+### 2. **Hard-Coded API Key Initialization**
+In `utils/llm_function_caller.py`:
+```python
+client = OpenAI()  # Fails if OPENAI_API_KEY not set
+```
+This causes import errors even when using Gemini.
+
+### 3. **Missing Data Handling**
+- Tests require actual data files that aren't in the repo
+- No mock data or fixtures for unit testing
+- Integration tests skip silently when data is missing
+
+### 4. **Incomplete Error Handling**
+Some methods return empty results silently:
+```python
+if gdf.empty:
+    return {}  # No indication of why it's empty
+```
+
+### 5. **Mixed CRS Handling**
+- Multiple places handle CRS conversion differently
+- Some methods assume EPSG:4326, others don't
+- Potential for coordinate system mismatches
+
+### 6. **Memory Concerns**
+- `get_layer_geojson()` can load large datasets into memory
+- Building layer sampling helps but may not be sufficient for all layers
+
+### 7. **Dual LLM Systems**
+Two different LLM integration systems exist:
+- `llm_function_caller.py` (OpenAI direct)
+- `langraph_function_caller.py` (LangGraph/LangChain)
+
+The app uses LangGraph but the legacy file still exists and causes issues.
+
+### 8. **PostGIS Dependency Complexity**
+- Requires ogr2ogr for data loading
+- Complex setup for local development
+- Database schema must match expected table names
+
+---
+
+## How It Works
+
+### User Flow
+
+1. **User opens web interface** → Loads `templates/index.html`
+2. **Map initializes** → Fetches `/get_map_layers` for GeoJSON data
+3. **User draws polygon** → Stores coordinates in JavaScript
+4. **User asks question** → POST to `/query` with question + polygon
+5. **LLM processes query** → LangGraph agent with geospatial tools
+6. **Tools execute** → Analyzer performs spatial analysis
+7. **Response rendered** → Markdown parsed and displayed
+
+### Query Processing Flow
+
+```
+User Query + Polygon WKT
+         │
+         ▼
+┌─────────────────────────┐
+│  GeospatialService      │
+│  process_query()        │
+│  - Validate polygon     │
+│  - Convert to WKT       │
+│  - Call LLM             │
+└─────────────────────────┘
+         │
+         ▼
+┌─────────────────────────┐
+│  LangGraph Agent        │
+│  ask_with_functions()   │
+│  - Parse query          │
+│  - Select tools         │
+│  - Execute analysis     │
+└─────────────────────────┘
+         │
+         ▼
+┌─────────────────────────┐
+│  GeospatialAnalyzer     │
+│  analyze_region()       │
+│  count_features()       │
+│  weighted_tile_stats()  │
+└─────────────────────────┘
+         │
+         ▼
+   Analysis Results
+         │
+         ▼
+┌─────────────────────────┐
+│  LLM Final Response     │
+│  - Format results       │
+│  - Generate insights    │
+│  - Return markdown      │
+└─────────────────────────┘
+```
+
+---
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `OPENAI_API_KEY` | OpenAI API key | Required for OpenAI |
+| `GOOGLE_API_KEY` | Google Gemini API key | Required for Gemini |
+| `SUNTRACE_USE_POSTGIS` | Enable PostGIS backend | `false` |
+| `SUNTRACE_DATABASE_URI` | PostGIS connection string | Local default |
+| `LANGSMITH_API_KEY` | LangSmith tracing | Optional |
+
+### File Paths (configs/paths.py)
+
+Data files are expected in the `data/` directory:
+- Buildings, roads, villages, etc. in `data/viz_geojsons/`
+- Tile statistics in `data/Lamwo_Tile_Stats_EE_biomass.csv`
+- Grid tiles in `data/lamwo_sentinel_composites/`
+
+---
+
+## Testing
+
+### Run Tests
+```bash
+make test              # Unit tests (non-slow, non-integration)
+make test-all          # All tests
+make test-integration  # Integration tests with data
+make test-coverage     # With coverage report
+```
+
+### Current Test Status
+- **Passing**: Basic validation, CRS handling, parametrized tests
+- **Failing**: Tests using outdated `GeospatialAnalyzer` signature
+- **Skipped**: Integration tests (missing data files)
+
+---
+
+## Recommendations
+
+### Immediate Fixes
+
+1. **Fix test signatures** - Update tests to use new `GeospatialAnalyzer` parameters
+2. **Lazy OpenAI client** - Don't initialize at import time
+3. **Add mock data** - Create fixtures for unit tests
+
+### Short-Term Improvements
+
+1. **Consolidate LLM integrations** - Use only LangGraph, remove legacy
+2. **Add validation layer** - Validate inputs before processing
+3. **Improve error messages** - Return specific errors, not empty results
+
+### Long-Term Enhancements
+
+1. **Add caching layer** - Cache frequent spatial queries
+2. **Streaming responses** - Use LangGraph streaming for real-time updates
+3. **User sessions** - Persist conversation state
+4. **Multi-region support** - Extend beyond Lamwo district
+
+---
+
+## File Structure Reference
+
+```
+suntrace/
+├── main.py                 # FastAPI app entry point
+├── app/
+│   ├── api/
+│   │   ├── deps.py         # Dependency injection, app factory
+│   │   └── routes/
+│   │       ├── map.py      # Map layer endpoints
+│   │       └── query.py    # Query processing endpoints
+│   ├── core/
+│   │   ├── config.py       # Settings with Pydantic
+│   │   ├── exceptions.py   # Custom exceptions
+│   │   ├── logger.py       # Logging setup
+│   │   └── middleware.py   # Request logging
+│   ├── models/
+│   │   └── schemas.py      # Pydantic request/response models
+│   └── services/
+│       └── geospatial.py   # Main service class
+├── utils/
+│   ├── GeospatialAnalyzer.py   # File-based analyzer
+│   ├── GeospatialAnalyzer2.py  # PostGIS-optimized analyzer
+│   ├── factory.py              # Analyzer factory
+│   ├── langraph_function_caller.py  # LangGraph LLM integration
+│   ├── llm_function_caller.py      # Legacy OpenAI integration
+│   └── tools.py                    # Tool definitions
+├── configs/
+│   ├── paths.py           # Data file paths
+│   ├── stats.py           # Statistics column definitions
+│   └── system_prompt.py   # LLM system prompt
+├── templates/
+│   └── index.html         # Frontend with Leaflet + chat
+├── tests/                 # Test suite
+├── data/                  # Geospatial data (gitignored)
+└── scripts/               # Utility scripts
+```
+
+---
+
+## Conclusion
+
+Suntrace is a well-designed geospatial analysis platform with solid architecture and comprehensive functionality. The main areas needing attention are:
+
+1. **Test maintenance** - Update tests to match current API
+2. **LLM consolidation** - Remove legacy integration
+3. **Error handling** - Improve user feedback
+4. **Documentation** - Keep in sync with code changes
+
+The project demonstrates excellent use of modern Python patterns (dependency injection, factory pattern, Pydantic validation) and provides a powerful natural language interface for complex geospatial analysis.
diff --git a/tests/test_api_buffer.py b/tests/test_api_buffer.py
index c03049f..8000377 100644
--- a/tests/test_api_buffer.py
+++ b/tests/test_api_buffer.py
@@ -1,10 +1,16 @@
 #!/usr/bin/env python3
 """
 Simple API test for buffer functionality to verify the Flask endpoints work.
+
+This is a manual test script that requires a running server.
+Run manually with: python tests/test_api_buffer.py
+
+When run via pytest, it will skip since no server is available.
 """
 
 import json
 
+import pytest
 import requests
 from shapely.geometry import Point
 from shapely.wkt import dumps as wkt_dumps
@@ -12,6 +18,7 @@
 BASE_URL = "http://127.0.0.1:5000"
 
 
+@pytest.mark.skip(reason="Requires running server - run manually with python tests/test_api_buffer.py")
 def test_api_buffer():
     """Test buffer functionality through API endpoints"""
     print("🌐 Testing Buffer API Endpoints")
diff --git a/tests/test_buffer_functionality.py b/tests/test_buffer_functionality.py
index b920157..cf9ea23 100644
--- a/tests/test_buffer_functionality.py
+++ b/tests/test_buffer_functionality.py
@@ -1,25 +1,35 @@
 #!/usr/bin/env python3
 """
 Test script to verify buffer_geometry functionality works through the LLM interface.
+
+This test requires data files to be present. It will skip if data files are not available.
 """
 
 import os
 import sys
 
-sys.path.append(os.path.join(os.path.dirname(__file__)))
+import pytest
 
-from shapely.geometry import Point
-from utils.factory import create_geospatial_analyzer
-from utils.llm_function_caller import ask_with_functions
+sys.path.append(os.path.join(os.path.dirname(__file__)))
 
 
+@pytest.mark.integration
 def test_buffer_functionality():
     """Test the buffer functionality through the LLM interface."""
+    from shapely.geometry import Point
+    from utils.factory import create_geospatial_analyzer
 
     print("🧪 Testing buffer_geometry functionality...")
 
-    # Create analyzer
-    analyzer = create_geospatial_analyzer()
+    # Try to create analyzer - this may fail if data files are missing
+    try:
+        analyzer = create_geospatial_analyzer()
+    except Exception as e:
+        pytest.skip(f"Could not create analyzer (likely missing data files): {e}")
+
+    # Check if the analyzer has valid data
+    if analyzer._buildings_gdf.empty:
+        pytest.skip("No buildings data loaded - skipping test")
 
     # Test 1: Test buffer_geometry method directly
     print("\n1. Testing buffer_geometry method directly...")
@@ -27,44 +37,58 @@ def test_buffer_functionality():
     try:
         buffered = analyzer.buffer_geometry(test_point, 1000)  # 1km buffer
         print(f"✅ Direct buffer test successful: {type(buffered)}")
+        assert buffered is not None
+        assert hasattr(buffered, 'area')
+        assert buffered.area > 0
     except Exception as e:
-        print(f"❌ Direct buffer test failed: {e}")
-        return False
+        pytest.fail(f"Direct buffer test failed: {e}")
 
-    # Test 2: Test through LLM interface
-    print("\n2. Testing through LLM interface...")
+    print("✅ Buffer functionality test passed!")
 
-    # Note: For this test to work, you need OpenAI API key set
-    if not os.environ.get("OPENAI_API_KEY"):
-        print("⚠️  Skipping LLM test - OPENAI_API_KEY not set")
-        print("✅ Core buffer functionality works!")
-        return True
 
+if __name__ == "__main__":
+    # When run directly, call the test function but handle missing data gracefully
+    from shapely.geometry import Point
+    
+    print("🧪 Testing buffer_geometry functionality...")
+    
     try:
-        query = f"Create a 500 meter buffer around this point: {test_point.wkt}"
-        response = ask_with_functions(query, analyzer)
-        print(f"✅ LLM buffer test successful: {response[:100]}...")
+        from utils.factory import create_geospatial_analyzer
+        analyzer = create_geospatial_analyzer()
     except Exception as e:
-        print(f"❌ LLM buffer test failed: {e}")
-        # Don't fail the test if it's just an API issue
-        if "openai" in str(e).lower() or "api" in str(e).lower():
-            print("✅ Core buffer functionality works! (LLM API issue)")
-            return True
-        return False
-
-    print("✅ All buffer tests passed!")
-    return True
-
+        print(f"⚠️ Could not create analyzer: {e}")
+        print("✅ Core imports work. Data files may be missing.")
+        sys.exit(0)
+    
+    # Test buffer_geometry method directly
+    print("\n1. Testing buffer_geometry method directly...")
+    test_point = Point(32.8, 3.16)  # Point in Lamwo, Uganda
+    try:
+        buffered = analyzer.buffer_geometry(test_point, 1000)  # 1km buffer
+        print(f"✅ Direct buffer test successful: {type(buffered)}")
+    except Exception as e:
+        print(f"❌ Direct buffer test failed: {e}")
+        sys.exit(1)
 
-if __name__ == "__main__":
-    success = test_buffer_functionality()
-    if success:
-        print("\n🎉 Buffer functionality is ready for frontend integration!")
-        print("\n📋 Next steps for full end-to-end workflow:")
-        print("   1. Frontend feature selection (click on minigrids)")
-        print("   2. Buffer visualization on map")
-        print("   3. Interactive buffer queries through chat")
-        print("   4. Session management for selected features")
+    # Test 2: Test through LLM interface (optional)
+    print("\n2. Testing through LLM interface...")
+    
+    if not os.environ.get("OPENAI_API_KEY") and not os.environ.get("GOOGLE_API_KEY"):
+        print("⚠️  Skipping LLM test - No API key set")
+        print("✅ Core buffer functionality works!")
     else:
-        print("\n❌ Buffer functionality needs fixes before frontend integration")
-        sys.exit(1)
+        try:
+            from utils.langraph_function_caller import ask_with_functions
+            query = f"Create a 500 meter buffer around this point: {test_point.wkt}"
+            response = ask_with_functions(query, analyzer)
+            print(f"✅ LLM buffer test successful: {response[:100]}...")
+        except Exception as e:
+            print(f"⚠️ LLM buffer test skipped: {e}")
+            print("✅ Core buffer functionality works! (LLM API issue)")
+
+    print("\n🎉 Buffer functionality is ready for frontend integration!")
+    print("\n📋 Next steps for full end-to-end workflow:")
+    print("   1. Frontend feature selection (click on minigrids)")
+    print("   2. Buffer visualization on map")
+    print("   3. Interactive buffer queries through chat")
+    print("   4. Session management for selected features")
diff --git a/tests/test_geospatial_analyzer.py b/tests/test_geospatial_analyzer.py
index 34c7f4a..123c7df 100644
--- a/tests/test_geospatial_analyzer.py
+++ b/tests/test_geospatial_analyzer.py
@@ -1,3 +1,12 @@
+"""
+Manual test script for GeospatialAnalyzer (requires data files).
+
+This script is designed to be run directly, not through pytest.
+It requires the sample data files to be present in the data/ directory.
+
+Usage:
+    python tests/test_geospatial_analyzer.py
+"""
 import os
 import sys
 
@@ -6,28 +15,35 @@
 
 # Add the src directory to the Python path
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-from utils.factory import create_geospatial_analyzer
-
-# Add the project root (one level up) to the Python path for configs
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-from configs.paths import SAMPLE_REGION_PATH
-
-# Define a sample region for testing (you may need to adjust coordinates)
-# These coordinates are arbitrary and likely outside your actual data extent.
-# You should use coordinates that are relevant to your data for meaningful tests.
-# using shapely open shapefile from path data/sample_region_mudu/5502888.shp
 
-# Load the sample polygon from the shapefile
 
-sample_gdf = gpd.read_file(SAMPLE_REGION_PATH)
-if sample_gdf.empty:
-    raise ValueError(
-        f"Sample region shapefile at {SAMPLE_REGION_PATH} is empty or failed to load."
-    )
-sample_polygon = sample_gdf.geometry.iloc[0]
+def get_sample_polygon():
+    """Load sample polygon from file, return None if file doesn't exist."""
+    from configs.paths import SAMPLE_REGION_PATH
+    
+    if not os.path.exists(SAMPLE_REGION_PATH):
+        print(f"Warning: Sample region file not found at {SAMPLE_REGION_PATH}")
+        return None
+    
+    sample_gdf = gpd.read_file(SAMPLE_REGION_PATH)
+    if sample_gdf.empty:
+        print(f"Warning: Sample region shapefile at {SAMPLE_REGION_PATH} is empty.")
+        return None
+    
+    return sample_gdf.geometry.iloc[0]
 
 
 def run_tests():
+    """Run manual tests for the GeospatialAnalyzer."""
+    from utils.factory import create_geospatial_analyzer
+    
+    # Load sample polygon
+    sample_polygon = get_sample_polygon()
+    if sample_polygon is None:
+        print("Error: Cannot run tests without sample polygon data.")
+        print("Please ensure data/sample_region_mudu/mudu_village.gpkg exists.")
+        return
+    
     print("Initializing GeospatialAnalyzer...")
     try:
         analyzer = create_geospatial_analyzer()
@@ -48,12 +64,12 @@ def run_tests():
     else:
         print("  Buildings GDF is empty or failed to load.")
 
-    if not analyzer._minigrids_gdf.empty:
+    if not analyzer._candidate_minigrids_gdf.empty:
         print(
-            f"  Minigrids loaded: {len(analyzer._minigrids_gdf)} features. CRS: {analyzer._minigrids_gdf.crs}"
+            f"  Candidate Minigrids loaded: {len(analyzer._candidate_minigrids_gdf)} features. CRS: {analyzer._candidate_minigrids_gdf.crs}"
         )
     else:
-        print("  Minigrids GDF is empty or failed to load.")
+        print("  Candidate Minigrids GDF is empty or failed to load.")
 
     if not analyzer._plain_tiles_gdf.empty:
         print(
@@ -180,15 +196,15 @@ def run_tests():
     except Exception as e:
         print(f"  Error counting buildings: {e}")
 
-    # 2. Count minigrids
-    print("\n2. count_features_within (minigrids):")
+    # 2. Count candidate minigrids
+    print("\n2. count_features_within (candidate_minigrids):")
     try:
         minigrid_count = analyzer.count_features_within_region(
-            test_region_polygon, "minigrids"
+            test_region_polygon, "candidate_minigrids"
         )
-        print(f"  Number of minigrids in sample region: {minigrid_count}")
+        print(f"  Number of candidate minigrids in sample region: {minigrid_count}")
     except Exception as e:
-        print(f"  Error counting minigrids: {e}")
+        print(f"  Error counting candidate minigrids: {e}")
 
     # 3. Count tiles
     print("\n3. count_features_within (tiles):")
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 2b86ec4..55cd946 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -199,14 +199,14 @@ def test_coordinate_system_consistency(self, analyzer_with_data):
         target_crs = "EPSG:4326"  # WGS84
         
         if hasattr(analyzer_with_data, '_buildings_gdf') and not analyzer_with_data._buildings_gdf.empty:
-            buildings_4326 = analyzer_with_data._ensure_gdf_crs_for_calculation(
+            buildings_4326 = analyzer_with_data._check_and_reproject_gdf(
                 analyzer_with_data._buildings_gdf.copy(), target_crs
             )
             assert buildings_4326.crs.to_string() == target_crs
             
-        if hasattr(analyzer_with_data, '_minigrids_gdf') and not analyzer_with_data._minigrids_gdf.empty:
-            minigrids_4326 = analyzer_with_data._ensure_gdf_crs_for_calculation(
-                analyzer_with_data._minigrids_gdf.copy(), target_crs
+        if hasattr(analyzer_with_data, '_candidate_minigrids_gdf') and not analyzer_with_data._candidate_minigrids_gdf.empty:
+            minigrids_4326 = analyzer_with_data._check_and_reproject_gdf(
+                analyzer_with_data._candidate_minigrids_gdf.copy(), target_crs
             )
             assert minigrids_4326.crs.to_string() == target_crs
 
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
index 0f541fe..8c3a949 100644
--- a/tests/test_utilities.py
+++ b/tests/test_utilities.py
@@ -30,9 +30,16 @@ def mock_minimal_analyzer(self):
 
                 analyzer = GeospatialAnalyzer(
                     buildings_path="mock.gpkg",
-                    minigrids_path="mock.gpkg",
                     tile_stats_path="mock.csv",
                     plain_tiles_path="mock.geojson",
+                    candidate_minigrids_path="mock.gpkg",
+                    existing_minigrids_path="mock.gpkg",
+                    existing_grid_path="mock.gpkg",
+                    grid_extension_path="mock.gpkg",
+                    roads_path="mock.gpkg",
+                    villages_path="mock.gpkg",
+                    parishes_path="mock.gpkg",
+                    subcounties_path="mock.gpkg",
                 )
                 return analyzer
 
@@ -41,13 +48,12 @@ def test_buffer_geometry_with_geographic_crs(self, mock_minimal_analyzer):
         # Create a point in geographic coordinates
         point = Point(-0.5, 0.5)  # Lon, Lat
 
-        # Mock the geometry to have geographic CRS
-        with patch.object(point, "crs", "EPSG:4326"):
-            buffered = mock_minimal_analyzer.buffer_geometry(point, 1000)  # 1km buffer
+        # The buffer_geometry method handles CRS conversion internally
+        buffered = mock_minimal_analyzer.buffer_geometry(point, 1000)  # 1km buffer
 
-            assert buffered is not None
-            assert hasattr(buffered, "area")
-            assert buffered.area > 0
+        assert buffered is not None
+        assert hasattr(buffered, "area")
+        assert buffered.area > 0
 
     def test_buffer_geometry_without_crs(self, mock_minimal_analyzer):
         """Test buffering without CRS information."""
@@ -59,8 +65,8 @@ def test_buffer_geometry_without_crs(self, mock_minimal_analyzer):
         assert hasattr(buffered, "area")
         assert buffered.area > 0
 
-    def test_ensure_gdf_crs_for_calculation(self, mock_minimal_analyzer):
-        """Test CRS ensuring for GeoDataFrames."""
+    def test_check_and_reproject_gdf(self, mock_minimal_analyzer):
+        """Test CRS reprojection for GeoDataFrames."""
         # Create test GeoDataFrame
         test_gdf = gpd.GeoDataFrame(
             {"geometry": [Point(0, 0), Point(1, 1)]}, crs="EPSG:4326"
@@ -68,39 +74,37 @@ def test_ensure_gdf_crs_for_calculation(self, mock_minimal_analyzer):
 
         target_crs = "EPSG:32636"  # UTM Zone 36N
 
-        result_gdf = mock_minimal_analyzer._ensure_gdf_crs_for_calculation(
+        result_gdf = mock_minimal_analyzer._check_and_reproject_gdf(
             test_gdf, target_crs
         )
 
         assert result_gdf.crs.to_string() == target_crs
         assert len(result_gdf) == len(test_gdf)
 
-    def test_ensure_crs_for_calculation_same_crs(self, mock_minimal_analyzer):
-        """Test CRS ensuring when geometry is already in target CRS."""
-        # Create geometry that's already in target CRS
-        target_crs = "EPSG:32636"
-        test_geom = gpd.GeoSeries([Point(0, 0)], crs=target_crs).iloc[0]
+    def test_prepare_geometry_for_crs_same_crs(self, mock_minimal_analyzer):
+        """Test geometry preparation when in same CRS."""
+        # Create geometry - prepare_geometry_for_crs assumes input is in geographic CRS
+        source_geom = Point(0, 0)
+        target_crs = mock_minimal_analyzer.target_geographic_crs
 
-        result_geom, was_reprojected = (
-            mock_minimal_analyzer._ensure_crs_for_calculation(test_geom, target_crs)
+        result_series, was_reprojected = (
+            mock_minimal_analyzer._prepare_geometry_for_crs(source_geom, target_crs)
         )
 
         assert not was_reprojected
-        assert result_geom is not None
+        assert result_series is not None
 
-    def test_ensure_crs_for_calculation_different_crs(self, mock_minimal_analyzer):
-        """Test CRS ensuring when geometry needs reprojection."""
-        source_crs = "EPSG:4326"
-        target_crs = "EPSG:32636"
-
-        test_geom = gpd.GeoSeries([Point(0, 0)], crs=source_crs).iloc[0]
+    def test_prepare_geometry_for_crs_different_crs(self, mock_minimal_analyzer):
+        """Test geometry preparation when reprojection is needed."""
+        source_geom = Point(0, 0)
+        target_crs = "EPSG:32636"  # UTM Zone 36N
 
-        result_geom, was_reprojected = (
-            mock_minimal_analyzer._ensure_crs_for_calculation(test_geom, target_crs)
+        result_series, was_reprojected = (
+            mock_minimal_analyzer._prepare_geometry_for_crs(source_geom, target_crs)
         )
 
         assert was_reprojected
-        assert result_geom is not None
+        assert result_series is not None
 
 
 class TestGeospatialAnalyzerErrorHandling:
@@ -130,9 +134,16 @@ def test_count_features_empty_gdf(self):
 
                 analyzer = GeospatialAnalyzer(
                     buildings_path="mock.gpkg",
-                    minigrids_path="mock.gpkg",
                     tile_stats_path="mock.csv",
                     plain_tiles_path="mock.geojson",
+                    candidate_minigrids_path="mock.gpkg",
+                    existing_minigrids_path="mock.gpkg",
+                    existing_grid_path="mock.gpkg",
+                    grid_extension_path="mock.gpkg",
+                    roads_path="mock.gpkg",
+                    villages_path="mock.gpkg",
+                    parishes_path="mock.gpkg",
+                    subcounties_path="mock.gpkg",
                 )
 
                 test_region = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
@@ -156,9 +167,16 @@ def test_invalid_filter_expression(self):
 
                 analyzer = GeospatialAnalyzer(
                     buildings_path="mock.gpkg",
-                    minigrids_path="mock.gpkg",
                     tile_stats_path="mock.csv",
                     plain_tiles_path="mock.geojson",
+                    candidate_minigrids_path="mock.gpkg",
+                    existing_minigrids_path="mock.gpkg",
+                    existing_grid_path="mock.gpkg",
+                    grid_extension_path="mock.gpkg",
+                    roads_path="mock.gpkg",
+                    villages_path="mock.gpkg",
+                    parishes_path="mock.gpkg",
+                    subcounties_path="mock.gpkg",
                 )
 
                 test_region = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])
diff --git a/utils/llm_function_caller.py b/utils/llm_function_caller.py
index 3964a20..3ec17e1 100644
--- a/utils/llm_function_caller.py
+++ b/utils/llm_function_caller.py
@@ -13,8 +13,22 @@
 from configs.system_prompt import SYSTEM_PROMPT
 
 
+# Lazy initialization of OpenAI client
+_client = None
 
-client = OpenAI()
+
+def get_openai_client():
+    """Get or create OpenAI client lazily."""
+    global _client
+    if _client is None:
+        api_key = os.environ.get("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "OPENAI_API_KEY environment variable is not set. "
+                "Please set it to use the OpenAI-based function caller."
+            )
+        _client = OpenAI()
+    return _client
 
 
 # ── 3) dispatcher ───────────────────────────────────────────────────────────
@@ -80,6 +94,9 @@ def ask_with_functions(user_prompt, analyzer=None):
 
         #print("Debug: Making first API call...")
 
+        # Get the OpenAI client lazily
+        client = get_openai_client()
+
         # First call: let the model decide if it needs functions
         response = client.chat.completions.create(
             model="gpt-4o-mini",

From 0e40a4318414ffccfb8bff3517d3930e104aa267 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Dec 2025 02:04:10 +0000
Subject: [PATCH 3/3] Address code review feedback: thread-safe OpenAI client,
 clearer mock paths, improved documentation

Co-authored-by: yigagilbert <97894246+yigagilbert@users.noreply.github.com>
---
 docs/PROJECT_ANALYSIS.md           | 39 ++++++++++++++++++++++++------
 tests/test_buffer_functionality.py | 11 +++++++--
 tests/test_utilities.py            | 31 +++++++++++++++---------
 utils/llm_function_caller.py       | 23 +++++++++++-------
 4 files changed, 73 insertions(+), 31 deletions(-)

diff --git a/docs/PROJECT_ANALYSIS.md b/docs/PROJECT_ANALYSIS.md
index 09d77d6..cc20f05 100644
--- a/docs/PROJECT_ANALYSIS.md
+++ b/docs/PROJECT_ANALYSIS.md
@@ -198,24 +198,47 @@ The analyzer supports these geospatial layers:
 
 ## Weaknesses 🔧
 
-### 1. **Outdated/Broken Tests**
-The test suite has issues:
+### 1. **~~Outdated/Broken Tests~~ (FIXED)**
+The test suite previously had issues with outdated constructor signatures.
+
+**Original Issue:**
 ```python
-# Test uses old signature (minigrids_path)
+# Test used old signature (minigrids_path)
 analyzer = GeospatialAnalyzer(
     buildings_path="mock.gpkg",
     minigrids_path="mock.gpkg",  # This parameter no longer exists!
     ...
 )
 ```
-The `GeospatialAnalyzer.__init__` signature was updated but tests weren't.
 
-### 2. **Hard-Coded API Key Initialization**
-In `utils/llm_function_caller.py`:
+**Fixed - Now uses correct signature:**
+```python
+analyzer = GeospatialAnalyzer(
+    buildings_path="mock_buildings.gpkg",
+    tile_stats_path="mock_stats.csv",
+    plain_tiles_path="mock_tiles.geojson",
+    candidate_minigrids_path="mock_candidate_minigrids.gpkg",
+    existing_minigrids_path="mock_existing_minigrids.gpkg",
+    existing_grid_path="mock_grid.gpkg",
+    grid_extension_path="mock_grid_extension.gpkg",
+    roads_path="mock_roads.gpkg",
+    villages_path="mock_villages.gpkg",
+    parishes_path="mock_parishes.gpkg",
+    subcounties_path="mock_subcounties.gpkg",
+)
+```
+
+### 2. **~~Hard-Coded API Key Initialization~~ (FIXED)**
+Previously in `utils/llm_function_caller.py`:
+```python
+client = OpenAI()  # Failed if OPENAI_API_KEY not set
+```
+**Fixed:** Now uses thread-safe lazy initialization:
 ```python
-client = OpenAI()  # Fails if OPENAI_API_KEY not set
+def get_openai_client():
+    """Get or create OpenAI client lazily (thread-safe)."""
+    # Only initializes when actually needed
 ```
-This causes import errors even when using Gemini.
 
 ### 3. **Missing Data Handling**
 - Tests require actual data files that aren't in the repo
diff --git a/tests/test_buffer_functionality.py b/tests/test_buffer_functionality.py
index cf9ea23..ad34d49 100644
--- a/tests/test_buffer_functionality.py
+++ b/tests/test_buffer_functionality.py
@@ -71,12 +71,19 @@ def test_buffer_functionality():
         sys.exit(1)
 
     # Test 2: Test through LLM interface (optional)
+    # The LangGraph function caller uses Google Gemini by default (GOOGLE_API_KEY)
+    # but can be configured to use OpenAI (OPENAI_API_KEY)
     print("\n2. Testing through LLM interface...")
     
-    if not os.environ.get("OPENAI_API_KEY") and not os.environ.get("GOOGLE_API_KEY"):
-        print("⚠️  Skipping LLM test - No API key set")
+    has_gemini_key = bool(os.environ.get("GOOGLE_API_KEY"))
+    has_openai_key = bool(os.environ.get("OPENAI_API_KEY"))
+    
+    if not has_gemini_key and not has_openai_key:
+        print("⚠️  Skipping LLM test - No API key set (need GOOGLE_API_KEY or OPENAI_API_KEY)")
         print("✅ Core buffer functionality works!")
     else:
+        provider = "Google Gemini" if has_gemini_key else "OpenAI"
+        print(f"   Using {provider} as LLM provider...")
         try:
             from utils.langraph_function_caller import ask_with_functions
             query = f"Create a 500 meter buffer around this point: {test_point.wkt}"
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
index 8c3a949..9b5a22e 100644
--- a/tests/test_utilities.py
+++ b/tests/test_utilities.py
@@ -17,7 +17,13 @@ class TestGeospatialAnalyzerUtilities:
 
     @pytest.fixture
     def mock_minimal_analyzer(self):
-        """Create a minimal analyzer for utility testing."""
+        """Create a minimal analyzer for utility testing.
+        
+        Note: All paths use "mock.*" since the actual file reading is mocked.
+        The mock returns the same empty GeoDataFrame for all calls, which is
+        appropriate for testing the analyzer's methods that don't depend on
+        specific data in each layer.
+        """
         with patch("utils.GeospatialAnalyzer.gpd.read_file") as mock_read_file:
             with patch("pandas.read_csv") as mock_read_csv:
                 # Mock empty GeoDataFrames
@@ -28,18 +34,19 @@ def mock_minimal_analyzer(self):
                 empty_df = pd.DataFrame()
                 mock_read_csv.return_value = empty_df
 
+                # Paths are mocked so the actual values don't matter
                 analyzer = GeospatialAnalyzer(
-                    buildings_path="mock.gpkg",
-                    tile_stats_path="mock.csv",
-                    plain_tiles_path="mock.geojson",
-                    candidate_minigrids_path="mock.gpkg",
-                    existing_minigrids_path="mock.gpkg",
-                    existing_grid_path="mock.gpkg",
-                    grid_extension_path="mock.gpkg",
-                    roads_path="mock.gpkg",
-                    villages_path="mock.gpkg",
-                    parishes_path="mock.gpkg",
-                    subcounties_path="mock.gpkg",
+                    buildings_path="mock_buildings.gpkg",
+                    tile_stats_path="mock_stats.csv",
+                    plain_tiles_path="mock_tiles.geojson",
+                    candidate_minigrids_path="mock_candidate_minigrids.gpkg",
+                    existing_minigrids_path="mock_existing_minigrids.gpkg",
+                    existing_grid_path="mock_grid.gpkg",
+                    grid_extension_path="mock_grid_extension.gpkg",
+                    roads_path="mock_roads.gpkg",
+                    villages_path="mock_villages.gpkg",
+                    parishes_path="mock_parishes.gpkg",
+                    subcounties_path="mock_subcounties.gpkg",
                 )
                 return analyzer
 
diff --git a/utils/llm_function_caller.py b/utils/llm_function_caller.py
index 3ec17e1..66b9cb9 100644
--- a/utils/llm_function_caller.py
+++ b/utils/llm_function_caller.py
@@ -5,6 +5,7 @@
 from shapely.wkt import loads as wkt_loads
 import sys
 import unicodedata
+import threading
 # ── 1) define tools ──────────────────────────────────────────────────────────
 from .tools import tools
 
@@ -13,21 +14,25 @@
 from configs.system_prompt import SYSTEM_PROMPT
 
 
-# Lazy initialization of OpenAI client
+# Thread-safe lazy initialization of OpenAI client
 _client = None
+_client_lock = threading.Lock()
 
 
 def get_openai_client():
-    """Get or create OpenAI client lazily."""
+    """Get or create OpenAI client lazily (thread-safe)."""
     global _client
     if _client is None:
-        api_key = os.environ.get("OPENAI_API_KEY")
-        if not api_key:
-            raise ValueError(
-                "OPENAI_API_KEY environment variable is not set. "
-                "Please set it to use the OpenAI-based function caller."
-            )
-        _client = OpenAI()
+        with _client_lock:
+            # Double-check locking pattern
+            if _client is None:
+                api_key = os.environ.get("OPENAI_API_KEY")
+                if not api_key:
+                    raise ValueError(
+                        "OPENAI_API_KEY environment variable is not set. "
+                        "Please set it to use the OpenAI-based function caller."
+                    )
+                _client = OpenAI()
     return _client