From 69230a39f85a4b63574b71be17ad277bb08ce42f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Dec 2025 01:49:13 +0000 Subject: [PATCH 1/3] Initial plan From 5c945eb81895dbcea541b05a0e0ef141a6672aa2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Dec 2025 02:00:51 +0000 Subject: [PATCH 2/3] Fix broken tests, lazy-load OpenAI client, and add comprehensive project documentation Co-authored-by: yigagilbert <97894246+yigagilbert@users.noreply.github.com> --- docs/PROJECT_ANALYSIS.md | 422 +++++++++++++++++++++++++++++ tests/test_api_buffer.py | 7 + tests/test_buffer_functionality.py | 102 ++++--- tests/test_geospatial_analyzer.py | 66 +++-- tests/test_integration.py | 8 +- tests/test_utilities.py | 76 ++++-- utils/llm_function_caller.py | 19 +- 7 files changed, 602 insertions(+), 98 deletions(-) create mode 100644 docs/PROJECT_ANALYSIS.md diff --git a/docs/PROJECT_ANALYSIS.md b/docs/PROJECT_ANALYSIS.md new file mode 100644 index 0000000..09d77d6 --- /dev/null +++ b/docs/PROJECT_ANALYSIS.md @@ -0,0 +1,422 @@ +# Suntrace GeospatialAnalyzer - Project Analysis + +## Overview + +**Suntrace** is a multimodal geospatial analysis platform for off-grid electrification planning in Uganda. It integrates GIS data, LLM function calling (via OpenAI and Google Gemini), and interactive web visualization using Leaflet maps with a chat interface. + +### Purpose +The platform helps government officials, planners, and development practitioners make informed decisions about rural energy infrastructure, focusing on: +- Solar energy potential assessment +- Mini-grid site evaluation +- Settlement density analysis +- Infrastructure connectivity assessment + +--- + +## Architecture + +### High-Level Architecture + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Frontend (templates/index.html) │ +│ ┌─────────────────┐ ┌────────────────────────────────┐│ +│ │ Leaflet Map │ │ Chat Interface ││ +│ │ - Draw regions │ │ - Natural language queries ││ +│ │ - View layers │ │ - Markdown response display ││ +│ └─────────────────┘ └────────────────────────────────┘│ +└──────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────────────────────────────┐ +│ FastAPI Backend (main.py) │ +│ ┌─────────────────┐ ┌────────────────────────────────┐│ +│ │ Map Routes │ │ Query Routes ││ +│ │ /get_map_layers│ │ /query (LLM processing) ││ +│ │ / │ │ /health ││ +│ └─────────────────┘ └────────────────────────────────┘│ +└──────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────────────────────────────┐ +│ GeospatialService Layer │ +│ (app/services/geospatial.py) │ +│ - Coordinates map layer retrieval │ +│ - Processes natural language queries with LLM │ +│ - Converts polygon coordinates to WKT │ +└──────────────────────────────────────────────────────────────────┘ + │ + ┌─────────┴─────────┐ + ▼ ▼ +┌────────────────────────────────┐ ┌────────────────────────────────┐ +│ GeospatialAnalyzer │ │ LangGraph Function Caller │ +│ (utils/GeospatialAnalyzer.py)│ │ (utils/langraph_function_ │ +│ │ │ caller.py) │ +│ - File-based GeoDataFrame ops │ │ - LLM tool calling via │ +│ - GeoPandas for spatial joins │ │ LangGraph/LangChain │ +│ - Shapely geometry processing │ │ - Function tools for spatial │ +│ - NDVI/environmental metrics │ │ analysis │ +└────────────────────────────────┘ └────────────────────────────────┘ + │ │ + │ ▼ + │ ┌────────────────────────────────┐ + │ │ GeospatialAnalyzer2 │ + │ │ (PostGIS-optimized) │ + │ │ - SQL-based spatial queries │ + │ │ - Efficient for scale │ + │ │ - Falls back to GeoPandas │ + └───────────────────────►└────────────────────────────────┘ + │ + ▼ + ┌────────────────────────────────┐ + │ Data Sources │ + │ - GeoJSON/GeoPackage files │ + │ - PostGIS database │ + │ - CSV tile statistics │ + └────────────────────────────────┘ +``` + +--- + +## Core Components + +### 1. GeospatialAnalyzer (File-Based) +**Location:** `utils/GeospatialAnalyzer.py` + +The primary analyzer class that loads and processes geospatial data using GeoPandas. It provides: + +- **Data Loading**: Reads GeoJSON, GeoPackage, and CSV files +- **Spatial Operations**: Intersection, buffering, distance calculations +- **Environmental Metrics**: NDVI, elevation, slope, rainfall analysis +- **Settlement Analysis**: Building counts, village information +- **Infrastructure Analysis**: Roads, grid infrastructure, minigrids + +**Key Methods:** +```python +count_features_within_region(region, layer_name, filter_expr) +analyze_region(region) # Comprehensive region analysis +weighted_tile_stats_all(region) # Environmental metrics +compute_distance_to_grid(geometry) +buffer_geometry(geom, radius_m) +``` + +### 2. GeospatialAnalyzer2 (PostGIS-Optimized) +**Location:** `utils/GeospatialAnalyzer2.py` + +An optimized version that pushes spatial operations to PostGIS when available: + +- **SQL-based Queries**: Uses ST_Intersects, ST_Area, etc. +- **Spatial Indexing**: Creates GiST indexes for performance +- **Lazy Loading**: Caches results for repeated queries +- **Fallback**: Falls back to GeoPandas when DB unavailable + +### 3. Factory Pattern +**Location:** `utils/factory.py` + +Creates the appropriate analyzer based on environment: +```python +def create_geospatial_analyzer(): + if use_postgis: + return GeospatialAnalyzer2(database_uri=uri) + else: + return GeospatialAnalyzer(file_paths...) +``` + +### 4. LLM Integration + +**LangGraph Agent** (`utils/langraph_function_caller.py`): +- Uses LangChain and LangGraph for tool-calling +- Supports Google Gemini and OpenAI models +- Maintains conversation state with InMemorySaver +- Provides tools for spatial analysis + +**Legacy OpenAI Client** (`utils/llm_function_caller.py`): +- Direct OpenAI API integration +- Manual tool dispatch and response handling + +--- + +## Data Layers + +The analyzer supports these geospatial layers: + +| Layer | Description | Geometry Type | +|-------|-------------|---------------| +| `buildings` | Building footprints | Polygon | +| `tiles` | Analysis grid tiles with statistics | Polygon | +| `roads` | Road network | LineString | +| `villages` | Village boundaries | Polygon | +| `parishes` | Parish boundaries | Polygon | +| `subcounties` | Subcounty boundaries | Polygon | +| `existing_grid` | Existing electricity grid | LineString | +| `grid_extension` | Proposed grid extensions | LineString | +| `candidate_minigrids` | Candidate minigrid sites | Polygon/Point | +| `existing_minigrids` | Existing minigrids | Polygon/Point | + +--- + +## Strengths 💪 + +### 1. **Well-Structured Architecture** +- Clear separation of concerns (API, services, core, utilities) +- Factory pattern for flexible analyzer instantiation +- Dependency injection with FastAPI + +### 2. **Dual Backend Support** +- File-based GeoPandas for development/small datasets +- PostGIS optimization for production scale +- Automatic fallback mechanism + +### 3. **LLM Integration** +- Natural language interface for geospatial queries +- Tool-calling with function schemas +- Support for multiple LLM providers (OpenAI, Google Gemini) + +### 4. **Comprehensive Analysis Capabilities** +- Environmental metrics (NDVI, elevation, rainfall) +- Settlement analysis (buildings, villages) +- Infrastructure analysis (roads, grid, minigrids) +- Distance calculations and buffering + +### 5. **Interactive Frontend** +- Leaflet-based map with drawing tools +- Chat interface with markdown rendering +- Layer controls and legends +- Responsive design with Bootstrap + +### 6. **Container-Ready** +- Dockerfile for easy deployment +- Docker Compose support +- Google Cloud Run deployment scripts + +### 7. **Good Documentation** +- Comprehensive README with setup instructions +- Testing guide (TESTING.md) +- Docstrings on major functions + +--- + +## Weaknesses 🔧 + +### 1. **Outdated/Broken Tests** +The test suite has issues: +```python +# Test uses old signature (minigrids_path) +analyzer = GeospatialAnalyzer( + buildings_path="mock.gpkg", + minigrids_path="mock.gpkg", # This parameter no longer exists! + ... +) +``` +The `GeospatialAnalyzer.__init__` signature was updated but tests weren't. + +### 2. **Hard-Coded API Key Initialization** +In `utils/llm_function_caller.py`: +```python +client = OpenAI() # Fails if OPENAI_API_KEY not set +``` +This causes import errors even when using Gemini. + +### 3. **Missing Data Handling** +- Tests require actual data files that aren't in the repo +- No mock data or fixtures for unit testing +- Integration tests skip silently when data is missing + +### 4. **Incomplete Error Handling** +Some methods return empty results silently: +```python +if gdf.empty: + return {} # No indication of why it's empty +``` + +### 5. **Mixed CRS Handling** +- Multiple places handle CRS conversion differently +- Some methods assume EPSG:4326, others don't +- Potential for coordinate system mismatches + +### 6. **Memory Concerns** +- `get_layer_geojson()` can load large datasets into memory +- Building layer sampling helps but may not be sufficient for all layers + +### 7. **Dual LLM Systems** +Two different LLM integration systems exist: +- `llm_function_caller.py` (OpenAI direct) +- `langraph_function_caller.py` (LangGraph/LangChain) + +The app uses LangGraph but the legacy file still exists and causes issues. + +### 8. **PostGIS Dependency Complexity** +- Requires ogr2ogr for data loading +- Complex setup for local development +- Database schema must match expected table names + +--- + +## How It Works + +### User Flow + +1. **User opens web interface** → Loads `templates/index.html` +2. **Map initializes** → Fetches `/get_map_layers` for GeoJSON data +3. **User draws polygon** → Stores coordinates in JavaScript +4. **User asks question** → POST to `/query` with question + polygon +5. **LLM processes query** → LangGraph agent with geospatial tools +6. **Tools execute** → Analyzer performs spatial analysis +7. **Response rendered** → Markdown parsed and displayed + +### Query Processing Flow + +``` +User Query + Polygon WKT + │ + ▼ +┌─────────────────────────┐ +│ GeospatialService │ +│ process_query() │ +│ - Validate polygon │ +│ - Convert to WKT │ +│ - Call LLM │ +└─────────────────────────┘ + │ + ▼ +┌─────────────────────────┐ +│ LangGraph Agent │ +│ ask_with_functions() │ +│ - Parse query │ +│ - Select tools │ +│ - Execute analysis │ +└─────────────────────────┘ + │ + ▼ +┌─────────────────────────┐ +│ GeospatialAnalyzer │ +│ analyze_region() │ +│ count_features() │ +│ weighted_tile_stats() │ +└─────────────────────────┘ + │ + ▼ + Analysis Results + │ + ▼ +┌─────────────────────────┐ +│ LLM Final Response │ +│ - Format results │ +│ - Generate insights │ +│ - Return markdown │ +└─────────────────────────┘ +``` + +--- + +## Configuration + +### Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `OPENAI_API_KEY` | OpenAI API key | Required for OpenAI | +| `GOOGLE_API_KEY` | Google Gemini API key | Required for Gemini | +| `SUNTRACE_USE_POSTGIS` | Enable PostGIS backend | `false` | +| `SUNTRACE_DATABASE_URI` | PostGIS connection string | Local default | +| `LANGSMITH_API_KEY` | LangSmith tracing | Optional | + +### File Paths (configs/paths.py) + +Data files are expected in the `data/` directory: +- Buildings, roads, villages, etc. in `data/viz_geojsons/` +- Tile statistics in `data/Lamwo_Tile_Stats_EE_biomass.csv` +- Grid tiles in `data/lamwo_sentinel_composites/` + +--- + +## Testing + +### Run Tests +```bash +make test # Unit tests (non-slow, non-integration) +make test-all # All tests +make test-integration # Integration tests with data +make test-coverage # With coverage report +``` + +### Current Test Status +- **Passing**: Basic validation, CRS handling, parametrized tests +- **Failing**: Tests using outdated `GeospatialAnalyzer` signature +- **Skipped**: Integration tests (missing data files) + +--- + +## Recommendations + +### Immediate Fixes + +1. **Fix test signatures** - Update tests to use new `GeospatialAnalyzer` parameters +2. **Lazy OpenAI client** - Don't initialize at import time +3. **Add mock data** - Create fixtures for unit tests + +### Short-Term Improvements + +1. **Consolidate LLM integrations** - Use only LangGraph, remove legacy +2. **Add validation layer** - Validate inputs before processing +3. **Improve error messages** - Return specific errors, not empty results + +### Long-Term Enhancements + +1. **Add caching layer** - Cache frequent spatial queries +2. **Streaming responses** - Use LangGraph streaming for real-time updates +3. **User sessions** - Persist conversation state +4. **Multi-region support** - Extend beyond Lamwo district + +--- + +## File Structure Reference + +``` +suntrace/ +├── main.py # FastAPI app entry point +├── app/ +│ ├── api/ +│ │ ├── deps.py # Dependency injection, app factory +│ │ └── routes/ +│ │ ├── map.py # Map layer endpoints +│ │ └── query.py # Query processing endpoints +│ ├── core/ +│ │ ├── config.py # Settings with Pydantic +│ │ ├── exceptions.py # Custom exceptions +│ │ ├── logger.py # Logging setup +│ │ └── middleware.py # Request logging +│ ├── models/ +│ │ └── schemas.py # Pydantic request/response models +│ └── services/ +│ └── geospatial.py # Main service class +├── utils/ +│ ├── GeospatialAnalyzer.py # File-based analyzer +│ ├── GeospatialAnalyzer2.py # PostGIS-optimized analyzer +│ ├── factory.py # Analyzer factory +│ ├── langraph_function_caller.py # LangGraph LLM integration +│ ├── llm_function_caller.py # Legacy OpenAI integration +│ └── tools.py # Tool definitions +├── configs/ +│ ├── paths.py # Data file paths +│ ├── stats.py # Statistics column definitions +│ └── system_prompt.py # LLM system prompt +├── templates/ +│ └── index.html # Frontend with Leaflet + chat +├── tests/ # Test suite +├── data/ # Geospatial data (gitignored) +└── scripts/ # Utility scripts +``` + +--- + +## Conclusion + +Suntrace is a well-designed geospatial analysis platform with solid architecture and comprehensive functionality. The main areas needing attention are: + +1. **Test maintenance** - Update tests to match current API +2. **LLM consolidation** - Remove legacy integration +3. **Error handling** - Improve user feedback +4. **Documentation** - Keep in sync with code changes + +The project demonstrates excellent use of modern Python patterns (dependency injection, factory pattern, Pydantic validation) and provides a powerful natural language interface for complex geospatial analysis. diff --git a/tests/test_api_buffer.py b/tests/test_api_buffer.py index c03049f..8000377 100644 --- a/tests/test_api_buffer.py +++ b/tests/test_api_buffer.py @@ -1,10 +1,16 @@ #!/usr/bin/env python3 """ Simple API test for buffer functionality to verify the Flask endpoints work. + +This is a manual test script that requires a running server. +Run manually with: python tests/test_api_buffer.py + +When run via pytest, it will skip since no server is available. """ import json +import pytest import requests from shapely.geometry import Point from shapely.wkt import dumps as wkt_dumps @@ -12,6 +18,7 @@ BASE_URL = "http://127.0.0.1:5000" +@pytest.mark.skip(reason="Requires running server - run manually with python tests/test_api_buffer.py") def test_api_buffer(): """Test buffer functionality through API endpoints""" print("🌐 Testing Buffer API Endpoints") diff --git a/tests/test_buffer_functionality.py b/tests/test_buffer_functionality.py index b920157..cf9ea23 100644 --- a/tests/test_buffer_functionality.py +++ b/tests/test_buffer_functionality.py @@ -1,25 +1,35 @@ #!/usr/bin/env python3 """ Test script to verify buffer_geometry functionality works through the LLM interface. + +This test requires data files to be present. It will skip if data files are not available. """ import os import sys -sys.path.append(os.path.join(os.path.dirname(__file__))) +import pytest -from shapely.geometry import Point -from utils.factory import create_geospatial_analyzer -from utils.llm_function_caller import ask_with_functions +sys.path.append(os.path.join(os.path.dirname(__file__))) +@pytest.mark.integration def test_buffer_functionality(): """Test the buffer functionality through the LLM interface.""" + from shapely.geometry import Point + from utils.factory import create_geospatial_analyzer print("🧪 Testing buffer_geometry functionality...") - # Create analyzer - analyzer = create_geospatial_analyzer() + # Try to create analyzer - this may fail if data files are missing + try: + analyzer = create_geospatial_analyzer() + except Exception as e: + pytest.skip(f"Could not create analyzer (likely missing data files): {e}") + + # Check if the analyzer has valid data + if analyzer._buildings_gdf.empty: + pytest.skip("No buildings data loaded - skipping test") # Test 1: Test buffer_geometry method directly print("\n1. Testing buffer_geometry method directly...") @@ -27,44 +37,58 @@ def test_buffer_functionality(): try: buffered = analyzer.buffer_geometry(test_point, 1000) # 1km buffer print(f"✅ Direct buffer test successful: {type(buffered)}") + assert buffered is not None + assert hasattr(buffered, 'area') + assert buffered.area > 0 except Exception as e: - print(f"❌ Direct buffer test failed: {e}") - return False + pytest.fail(f"Direct buffer test failed: {e}") - # Test 2: Test through LLM interface - print("\n2. Testing through LLM interface...") + print("✅ Buffer functionality test passed!") - # Note: For this test to work, you need OpenAI API key set - if not os.environ.get("OPENAI_API_KEY"): - print("⚠️ Skipping LLM test - OPENAI_API_KEY not set") - print("✅ Core buffer functionality works!") - return True +if __name__ == "__main__": + # When run directly, call the test function but handle missing data gracefully + from shapely.geometry import Point + + print("🧪 Testing buffer_geometry functionality...") + try: - query = f"Create a 500 meter buffer around this point: {test_point.wkt}" - response = ask_with_functions(query, analyzer) - print(f"✅ LLM buffer test successful: {response[:100]}...") + from utils.factory import create_geospatial_analyzer + analyzer = create_geospatial_analyzer() except Exception as e: - print(f"❌ LLM buffer test failed: {e}") - # Don't fail the test if it's just an API issue - if "openai" in str(e).lower() or "api" in str(e).lower(): - print("✅ Core buffer functionality works! (LLM API issue)") - return True - return False - - print("✅ All buffer tests passed!") - return True - + print(f"⚠️ Could not create analyzer: {e}") + print("✅ Core imports work. Data files may be missing.") + sys.exit(0) + + # Test buffer_geometry method directly + print("\n1. Testing buffer_geometry method directly...") + test_point = Point(32.8, 3.16) # Point in Lamwo, Uganda + try: + buffered = analyzer.buffer_geometry(test_point, 1000) # 1km buffer + print(f"✅ Direct buffer test successful: {type(buffered)}") + except Exception as e: + print(f"❌ Direct buffer test failed: {e}") + sys.exit(1) -if __name__ == "__main__": - success = test_buffer_functionality() - if success: - print("\n🎉 Buffer functionality is ready for frontend integration!") - print("\n📋 Next steps for full end-to-end workflow:") - print(" 1. Frontend feature selection (click on minigrids)") - print(" 2. Buffer visualization on map") - print(" 3. Interactive buffer queries through chat") - print(" 4. Session management for selected features") + # Test 2: Test through LLM interface (optional) + print("\n2. Testing through LLM interface...") + + if not os.environ.get("OPENAI_API_KEY") and not os.environ.get("GOOGLE_API_KEY"): + print("⚠️ Skipping LLM test - No API key set") + print("✅ Core buffer functionality works!") else: - print("\n❌ Buffer functionality needs fixes before frontend integration") - sys.exit(1) + try: + from utils.langraph_function_caller import ask_with_functions + query = f"Create a 500 meter buffer around this point: {test_point.wkt}" + response = ask_with_functions(query, analyzer) + print(f"✅ LLM buffer test successful: {response[:100]}...") + except Exception as e: + print(f"⚠️ LLM buffer test skipped: {e}") + print("✅ Core buffer functionality works! (LLM API issue)") + + print("\n🎉 Buffer functionality is ready for frontend integration!") + print("\n📋 Next steps for full end-to-end workflow:") + print(" 1. Frontend feature selection (click on minigrids)") + print(" 2. Buffer visualization on map") + print(" 3. Interactive buffer queries through chat") + print(" 4. Session management for selected features") diff --git a/tests/test_geospatial_analyzer.py b/tests/test_geospatial_analyzer.py index 34c7f4a..123c7df 100644 --- a/tests/test_geospatial_analyzer.py +++ b/tests/test_geospatial_analyzer.py @@ -1,3 +1,12 @@ +""" +Manual test script for GeospatialAnalyzer (requires data files). + +This script is designed to be run directly, not through pytest. +It requires the sample data files to be present in the data/ directory. + +Usage: + python tests/test_geospatial_analyzer.py +""" import os import sys @@ -6,28 +15,35 @@ # Add the src directory to the Python path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) -from utils.factory import create_geospatial_analyzer - -# Add the project root (one level up) to the Python path for configs -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) -from configs.paths import SAMPLE_REGION_PATH - -# Define a sample region for testing (you may need to adjust coordinates) -# These coordinates are arbitrary and likely outside your actual data extent. -# You should use coordinates that are relevant to your data for meaningful tests. -# using shapely open shapefile from path data/sample_region_mudu/5502888.shp -# Load the sample polygon from the shapefile -sample_gdf = gpd.read_file(SAMPLE_REGION_PATH) -if sample_gdf.empty: - raise ValueError( - f"Sample region shapefile at {SAMPLE_REGION_PATH} is empty or failed to load." - ) -sample_polygon = sample_gdf.geometry.iloc[0] +def get_sample_polygon(): + """Load sample polygon from file, return None if file doesn't exist.""" + from configs.paths import SAMPLE_REGION_PATH + + if not os.path.exists(SAMPLE_REGION_PATH): + print(f"Warning: Sample region file not found at {SAMPLE_REGION_PATH}") + return None + + sample_gdf = gpd.read_file(SAMPLE_REGION_PATH) + if sample_gdf.empty: + print(f"Warning: Sample region shapefile at {SAMPLE_REGION_PATH} is empty.") + return None + + return sample_gdf.geometry.iloc[0] def run_tests(): + """Run manual tests for the GeospatialAnalyzer.""" + from utils.factory import create_geospatial_analyzer + + # Load sample polygon + sample_polygon = get_sample_polygon() + if sample_polygon is None: + print("Error: Cannot run tests without sample polygon data.") + print("Please ensure data/sample_region_mudu/mudu_village.gpkg exists.") + return + print("Initializing GeospatialAnalyzer...") try: analyzer = create_geospatial_analyzer() @@ -48,12 +64,12 @@ def run_tests(): else: print(" Buildings GDF is empty or failed to load.") - if not analyzer._minigrids_gdf.empty: + if not analyzer._candidate_minigrids_gdf.empty: print( - f" Minigrids loaded: {len(analyzer._minigrids_gdf)} features. CRS: {analyzer._minigrids_gdf.crs}" + f" Candidate Minigrids loaded: {len(analyzer._candidate_minigrids_gdf)} features. CRS: {analyzer._candidate_minigrids_gdf.crs}" ) else: - print(" Minigrids GDF is empty or failed to load.") + print(" Candidate Minigrids GDF is empty or failed to load.") if not analyzer._plain_tiles_gdf.empty: print( @@ -180,15 +196,15 @@ def run_tests(): except Exception as e: print(f" Error counting buildings: {e}") - # 2. Count minigrids - print("\n2. count_features_within (minigrids):") + # 2. Count candidate minigrids + print("\n2. count_features_within (candidate_minigrids):") try: minigrid_count = analyzer.count_features_within_region( - test_region_polygon, "minigrids" + test_region_polygon, "candidate_minigrids" ) - print(f" Number of minigrids in sample region: {minigrid_count}") + print(f" Number of candidate minigrids in sample region: {minigrid_count}") except Exception as e: - print(f" Error counting minigrids: {e}") + print(f" Error counting candidate minigrids: {e}") # 3. Count tiles print("\n3. count_features_within (tiles):") diff --git a/tests/test_integration.py b/tests/test_integration.py index 2b86ec4..55cd946 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -199,14 +199,14 @@ def test_coordinate_system_consistency(self, analyzer_with_data): target_crs = "EPSG:4326" # WGS84 if hasattr(analyzer_with_data, '_buildings_gdf') and not analyzer_with_data._buildings_gdf.empty: - buildings_4326 = analyzer_with_data._ensure_gdf_crs_for_calculation( + buildings_4326 = analyzer_with_data._check_and_reproject_gdf( analyzer_with_data._buildings_gdf.copy(), target_crs ) assert buildings_4326.crs.to_string() == target_crs - if hasattr(analyzer_with_data, '_minigrids_gdf') and not analyzer_with_data._minigrids_gdf.empty: - minigrids_4326 = analyzer_with_data._ensure_gdf_crs_for_calculation( - analyzer_with_data._minigrids_gdf.copy(), target_crs + if hasattr(analyzer_with_data, '_candidate_minigrids_gdf') and not analyzer_with_data._candidate_minigrids_gdf.empty: + minigrids_4326 = analyzer_with_data._check_and_reproject_gdf( + analyzer_with_data._candidate_minigrids_gdf.copy(), target_crs ) assert minigrids_4326.crs.to_string() == target_crs diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 0f541fe..8c3a949 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -30,9 +30,16 @@ def mock_minimal_analyzer(self): analyzer = GeospatialAnalyzer( buildings_path="mock.gpkg", - minigrids_path="mock.gpkg", tile_stats_path="mock.csv", plain_tiles_path="mock.geojson", + candidate_minigrids_path="mock.gpkg", + existing_minigrids_path="mock.gpkg", + existing_grid_path="mock.gpkg", + grid_extension_path="mock.gpkg", + roads_path="mock.gpkg", + villages_path="mock.gpkg", + parishes_path="mock.gpkg", + subcounties_path="mock.gpkg", ) return analyzer @@ -41,13 +48,12 @@ def test_buffer_geometry_with_geographic_crs(self, mock_minimal_analyzer): # Create a point in geographic coordinates point = Point(-0.5, 0.5) # Lon, Lat - # Mock the geometry to have geographic CRS - with patch.object(point, "crs", "EPSG:4326"): - buffered = mock_minimal_analyzer.buffer_geometry(point, 1000) # 1km buffer + # The buffer_geometry method handles CRS conversion internally + buffered = mock_minimal_analyzer.buffer_geometry(point, 1000) # 1km buffer - assert buffered is not None - assert hasattr(buffered, "area") - assert buffered.area > 0 + assert buffered is not None + assert hasattr(buffered, "area") + assert buffered.area > 0 def test_buffer_geometry_without_crs(self, mock_minimal_analyzer): """Test buffering without CRS information.""" @@ -59,8 +65,8 @@ def test_buffer_geometry_without_crs(self, mock_minimal_analyzer): assert hasattr(buffered, "area") assert buffered.area > 0 - def test_ensure_gdf_crs_for_calculation(self, mock_minimal_analyzer): - """Test CRS ensuring for GeoDataFrames.""" + def test_check_and_reproject_gdf(self, mock_minimal_analyzer): + """Test CRS reprojection for GeoDataFrames.""" # Create test GeoDataFrame test_gdf = gpd.GeoDataFrame( {"geometry": [Point(0, 0), Point(1, 1)]}, crs="EPSG:4326" @@ -68,39 +74,37 @@ def test_ensure_gdf_crs_for_calculation(self, mock_minimal_analyzer): target_crs = "EPSG:32636" # UTM Zone 36N - result_gdf = mock_minimal_analyzer._ensure_gdf_crs_for_calculation( + result_gdf = mock_minimal_analyzer._check_and_reproject_gdf( test_gdf, target_crs ) assert result_gdf.crs.to_string() == target_crs assert len(result_gdf) == len(test_gdf) - def test_ensure_crs_for_calculation_same_crs(self, mock_minimal_analyzer): - """Test CRS ensuring when geometry is already in target CRS.""" - # Create geometry that's already in target CRS - target_crs = "EPSG:32636" - test_geom = gpd.GeoSeries([Point(0, 0)], crs=target_crs).iloc[0] + def test_prepare_geometry_for_crs_same_crs(self, mock_minimal_analyzer): + """Test geometry preparation when in same CRS.""" + # Create geometry - prepare_geometry_for_crs assumes input is in geographic CRS + source_geom = Point(0, 0) + target_crs = mock_minimal_analyzer.target_geographic_crs - result_geom, was_reprojected = ( - mock_minimal_analyzer._ensure_crs_for_calculation(test_geom, target_crs) + result_series, was_reprojected = ( + mock_minimal_analyzer._prepare_geometry_for_crs(source_geom, target_crs) ) assert not was_reprojected - assert result_geom is not None + assert result_series is not None - def test_ensure_crs_for_calculation_different_crs(self, mock_minimal_analyzer): - """Test CRS ensuring when geometry needs reprojection.""" - source_crs = "EPSG:4326" - target_crs = "EPSG:32636" - - test_geom = gpd.GeoSeries([Point(0, 0)], crs=source_crs).iloc[0] + def test_prepare_geometry_for_crs_different_crs(self, mock_minimal_analyzer): + """Test geometry preparation when reprojection is needed.""" + source_geom = Point(0, 0) + target_crs = "EPSG:32636" # UTM Zone 36N - result_geom, was_reprojected = ( - mock_minimal_analyzer._ensure_crs_for_calculation(test_geom, target_crs) + result_series, was_reprojected = ( + mock_minimal_analyzer._prepare_geometry_for_crs(source_geom, target_crs) ) assert was_reprojected - assert result_geom is not None + assert result_series is not None class TestGeospatialAnalyzerErrorHandling: @@ -130,9 +134,16 @@ def test_count_features_empty_gdf(self): analyzer = GeospatialAnalyzer( buildings_path="mock.gpkg", - minigrids_path="mock.gpkg", tile_stats_path="mock.csv", plain_tiles_path="mock.geojson", + candidate_minigrids_path="mock.gpkg", + existing_minigrids_path="mock.gpkg", + existing_grid_path="mock.gpkg", + grid_extension_path="mock.gpkg", + roads_path="mock.gpkg", + villages_path="mock.gpkg", + parishes_path="mock.gpkg", + subcounties_path="mock.gpkg", ) test_region = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) @@ -156,9 +167,16 @@ def test_invalid_filter_expression(self): analyzer = GeospatialAnalyzer( buildings_path="mock.gpkg", - minigrids_path="mock.gpkg", tile_stats_path="mock.csv", plain_tiles_path="mock.geojson", + candidate_minigrids_path="mock.gpkg", + existing_minigrids_path="mock.gpkg", + existing_grid_path="mock.gpkg", + grid_extension_path="mock.gpkg", + roads_path="mock.gpkg", + villages_path="mock.gpkg", + parishes_path="mock.gpkg", + subcounties_path="mock.gpkg", ) test_region = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]) diff --git a/utils/llm_function_caller.py b/utils/llm_function_caller.py index 3964a20..3ec17e1 100644 --- a/utils/llm_function_caller.py +++ b/utils/llm_function_caller.py @@ -13,8 +13,22 @@ from configs.system_prompt import SYSTEM_PROMPT +# Lazy initialization of OpenAI client +_client = None -client = OpenAI() + +def get_openai_client(): + """Get or create OpenAI client lazily.""" + global _client + if _client is None: + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + raise ValueError( + "OPENAI_API_KEY environment variable is not set. " + "Please set it to use the OpenAI-based function caller." + ) + _client = OpenAI() + return _client # ── 3) dispatcher ─────────────────────────────────────────────────────────── @@ -80,6 +94,9 @@ def ask_with_functions(user_prompt, analyzer=None): #print("Debug: Making first API call...") + # Get the OpenAI client lazily + client = get_openai_client() + # First call: let the model decide if it needs functions response = client.chat.completions.create( model="gpt-4o-mini", From 0e40a4318414ffccfb8bff3517d3930e104aa267 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Dec 2025 02:04:10 +0000 Subject: [PATCH 3/3] Address code review feedback: thread-safe OpenAI client, clearer mock paths, improved documentation Co-authored-by: yigagilbert <97894246+yigagilbert@users.noreply.github.com> --- docs/PROJECT_ANALYSIS.md | 39 ++++++++++++++++++++++++------ tests/test_buffer_functionality.py | 11 +++++++-- tests/test_utilities.py | 31 +++++++++++++++--------- utils/llm_function_caller.py | 23 +++++++++++------- 4 files changed, 73 insertions(+), 31 deletions(-) diff --git a/docs/PROJECT_ANALYSIS.md b/docs/PROJECT_ANALYSIS.md index 09d77d6..cc20f05 100644 --- a/docs/PROJECT_ANALYSIS.md +++ b/docs/PROJECT_ANALYSIS.md @@ -198,24 +198,47 @@ The analyzer supports these geospatial layers: ## Weaknesses 🔧 -### 1. **Outdated/Broken Tests** -The test suite has issues: +### 1. **~~Outdated/Broken Tests~~ (FIXED)** +The test suite previously had issues with outdated constructor signatures. + +**Original Issue:** ```python -# Test uses old signature (minigrids_path) +# Test used old signature (minigrids_path) analyzer = GeospatialAnalyzer( buildings_path="mock.gpkg", minigrids_path="mock.gpkg", # This parameter no longer exists! ... ) ``` -The `GeospatialAnalyzer.__init__` signature was updated but tests weren't. -### 2. **Hard-Coded API Key Initialization** -In `utils/llm_function_caller.py`: +**Fixed - Now uses correct signature:** +```python +analyzer = GeospatialAnalyzer( + buildings_path="mock_buildings.gpkg", + tile_stats_path="mock_stats.csv", + plain_tiles_path="mock_tiles.geojson", + candidate_minigrids_path="mock_candidate_minigrids.gpkg", + existing_minigrids_path="mock_existing_minigrids.gpkg", + existing_grid_path="mock_grid.gpkg", + grid_extension_path="mock_grid_extension.gpkg", + roads_path="mock_roads.gpkg", + villages_path="mock_villages.gpkg", + parishes_path="mock_parishes.gpkg", + subcounties_path="mock_subcounties.gpkg", +) +``` + +### 2. **~~Hard-Coded API Key Initialization~~ (FIXED)** +Previously in `utils/llm_function_caller.py`: +```python +client = OpenAI() # Failed if OPENAI_API_KEY not set +``` +**Fixed:** Now uses thread-safe lazy initialization: ```python -client = OpenAI() # Fails if OPENAI_API_KEY not set +def get_openai_client(): + """Get or create OpenAI client lazily (thread-safe).""" + # Only initializes when actually needed ``` -This causes import errors even when using Gemini. ### 3. **Missing Data Handling** - Tests require actual data files that aren't in the repo diff --git a/tests/test_buffer_functionality.py b/tests/test_buffer_functionality.py index cf9ea23..ad34d49 100644 --- a/tests/test_buffer_functionality.py +++ b/tests/test_buffer_functionality.py @@ -71,12 +71,19 @@ def test_buffer_functionality(): sys.exit(1) # Test 2: Test through LLM interface (optional) + # The LangGraph function caller uses Google Gemini by default (GOOGLE_API_KEY) + # but can be configured to use OpenAI (OPENAI_API_KEY) print("\n2. Testing through LLM interface...") - if not os.environ.get("OPENAI_API_KEY") and not os.environ.get("GOOGLE_API_KEY"): - print("⚠️ Skipping LLM test - No API key set") + has_gemini_key = bool(os.environ.get("GOOGLE_API_KEY")) + has_openai_key = bool(os.environ.get("OPENAI_API_KEY")) + + if not has_gemini_key and not has_openai_key: + print("⚠️ Skipping LLM test - No API key set (need GOOGLE_API_KEY or OPENAI_API_KEY)") print("✅ Core buffer functionality works!") else: + provider = "Google Gemini" if has_gemini_key else "OpenAI" + print(f" Using {provider} as LLM provider...") try: from utils.langraph_function_caller import ask_with_functions query = f"Create a 500 meter buffer around this point: {test_point.wkt}" diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 8c3a949..9b5a22e 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -17,7 +17,13 @@ class TestGeospatialAnalyzerUtilities: @pytest.fixture def mock_minimal_analyzer(self): - """Create a minimal analyzer for utility testing.""" + """Create a minimal analyzer for utility testing. + + Note: All paths use "mock.*" since the actual file reading is mocked. + The mock returns the same empty GeoDataFrame for all calls, which is + appropriate for testing the analyzer's methods that don't depend on + specific data in each layer. + """ with patch("utils.GeospatialAnalyzer.gpd.read_file") as mock_read_file: with patch("pandas.read_csv") as mock_read_csv: # Mock empty GeoDataFrames @@ -28,18 +34,19 @@ def mock_minimal_analyzer(self): empty_df = pd.DataFrame() mock_read_csv.return_value = empty_df + # Paths are mocked so the actual values don't matter analyzer = GeospatialAnalyzer( - buildings_path="mock.gpkg", - tile_stats_path="mock.csv", - plain_tiles_path="mock.geojson", - candidate_minigrids_path="mock.gpkg", - existing_minigrids_path="mock.gpkg", - existing_grid_path="mock.gpkg", - grid_extension_path="mock.gpkg", - roads_path="mock.gpkg", - villages_path="mock.gpkg", - parishes_path="mock.gpkg", - subcounties_path="mock.gpkg", + buildings_path="mock_buildings.gpkg", + tile_stats_path="mock_stats.csv", + plain_tiles_path="mock_tiles.geojson", + candidate_minigrids_path="mock_candidate_minigrids.gpkg", + existing_minigrids_path="mock_existing_minigrids.gpkg", + existing_grid_path="mock_grid.gpkg", + grid_extension_path="mock_grid_extension.gpkg", + roads_path="mock_roads.gpkg", + villages_path="mock_villages.gpkg", + parishes_path="mock_parishes.gpkg", + subcounties_path="mock_subcounties.gpkg", ) return analyzer diff --git a/utils/llm_function_caller.py b/utils/llm_function_caller.py index 3ec17e1..66b9cb9 100644 --- a/utils/llm_function_caller.py +++ b/utils/llm_function_caller.py @@ -5,6 +5,7 @@ from shapely.wkt import loads as wkt_loads import sys import unicodedata +import threading # ── 1) define tools ────────────────────────────────────────────────────────── from .tools import tools @@ -13,21 +14,25 @@ from configs.system_prompt import SYSTEM_PROMPT -# Lazy initialization of OpenAI client +# Thread-safe lazy initialization of OpenAI client _client = None +_client_lock = threading.Lock() def get_openai_client(): - """Get or create OpenAI client lazily.""" + """Get or create OpenAI client lazily (thread-safe).""" global _client if _client is None: - api_key = os.environ.get("OPENAI_API_KEY") - if not api_key: - raise ValueError( - "OPENAI_API_KEY environment variable is not set. " - "Please set it to use the OpenAI-based function caller." - ) - _client = OpenAI() + with _client_lock: + # Double-check locking pattern + if _client is None: + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + raise ValueError( + "OPENAI_API_KEY environment variable is not set. " + "Please set it to use the OpenAI-based function caller." + ) + _client = OpenAI() return _client