diff --git a/.gitignore b/.gitignore index 0816e08..ecf3a5e 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,13 @@ lib/ lib64/ parts/ sdist/ + +# Profiling results +profiling_*results/ +profile_*.txt +profile_*.json +profile_*.csv +performance_report_*.txt var/ wheels/ share/python-wheels/ diff --git a/docs/profiling.md b/docs/profiling.md new file mode 100644 index 0000000..eff1520 --- /dev/null +++ b/docs/profiling.md @@ -0,0 +1,228 @@ +# Performance Profiling for libCacheSim + +This document explains how to use the profiling utilities in libCacheSim to analyze performance bottlenecks in cache simulations, particularly plugin caches and trace processing operations. + +## Overview + +The `libcachesim.profiler` module provides comprehensive profiling capabilities using Python's built-in `cProfile` along with memory and CPU monitoring. This is particularly useful for: + +- Analyzing plugin cache performance bottlenecks +- Comparing different cache implementations +- Understanding where time is spent in trace processing +- Identifying memory usage patterns +- Getting detailed function-level performance data + +## Quick Start + +### Basic Function Profiling + +```python +from libcachesim.profiler import profile_cache_operations, profile_trace_processing + +# Profile cache operations (individual requests) +result = profile_cache_operations(cache, reader, num_requests=1000) +print(f"Time: {result.execution_time:.4f}s, Memory: {result.memory_peak:.2f}MB") + +# Profile complete trace processing +result = profile_trace_processing(cache, reader) +print(f"Miss ratio: {result.custom_metrics['obj_miss_ratio']:.4f}") +``` + +### Advanced Profiling with CacheSimProfiler + +```python +from libcachesim.profiler import CacheSimProfiler + +# Create profiler instance +profiler = CacheSimProfiler("profiling_results") + +# Profile specific operations +result = profiler.profile_plugin_cache_operations( + cache, reader, num_requests=1000, method_name="my_plugin_cache" +) + +# Generate comprehensive reports +report_file = profiler.generate_performance_report() +json_file = profiler.export_results_json() +csv_file = profiler.export_results_csv() +``` + +### Context Manager Profiling + +```python +from libcachesim.profiler import CacheSimProfiler + +profiler = CacheSimProfiler() + +with profiler.profile_context("custom_operation") as profile_result: + # Your code to profile here + for request in reader: + cache.get(request) + if some_condition: + break + + # Add custom metrics + profile_result.custom_metrics['requests_processed'] = request_count +``` + +## Understanding the Results + +### ProfileResult Object + +Each profiling operation returns a `ProfileResult` object with: + +- `execution_time`: Total execution time in seconds +- `memory_peak`: Peak memory usage during execution (MB) +- `memory_current`: Memory change from start to end (MB) +- `cpu_percent`: CPU usage percentage +- `custom_metrics`: Dictionary of custom metrics +- `profile_stats`: Detailed cProfile statistics + +### Generated Reports + +The profiler generates several types of output: + +1. **Text Reports** (`profile_*.txt`): Detailed cProfile output showing: + - Top functions by cumulative time + - Top functions by total time + - Function call counts and timing + +2. **Performance Reports** (`performance_report_*.txt`): Summary comparing all profiled methods + +3. **JSON Export** (`profile_results_*.json`): Machine-readable results for further analysis + +4. **CSV Export** (`profile_summary_*.csv`): Spreadsheet-compatible summary data + +### Example cProfile Output + +``` +Top Functions by Cumulative Time: +---------------------------------------- + 11007 function calls in 0.285 seconds + + Ordered by: cumulative time + + ncalls tottime percall cumtime percall filename:lineno(function) + 1 0.002 0.002 0.285 0.285 profiler.py:194(run_cache_operations) + 1000 0.003 0.000 0.279 0.000 plugin_cache.py:118(get) + 2000 0.273 0.000 0.273 0.000 {built-in method time.sleep} + 1000 0.002 0.000 0.164 0.000 lru_cache.py:253(cache_hit) +``` + +This shows that `time.sleep` calls are taking 0.273 out of 0.285 total seconds (96% of execution time). + +## Plugin Cache Performance Analysis + +Plugin caches can have performance issues due to Python callback overhead. The profiler helps identify: + +### Common Bottlenecks + +1. **Python Callback Overhead**: Each cache operation calls Python functions +2. **Slow Hook Functions**: User-defined cache hooks that are inefficient +3. **Memory Allocation**: Excessive object creation in Python +4. **Data Structure Operations**: Inefficient cache data structure operations + +### Example Analysis + +```python +# Compare plugin cache vs native implementation +profiler = CacheSimProfiler() + +# Profile native LRU +native_cache = LRU(cache_size=1024) +result1 = profiler.profile_trace_processing(native_cache, reader, "native_lru") + +# Profile plugin LRU +plugin_cache = PluginCache( + cache_size=1024, + cache_init_hook=init_hook, + cache_hit_hook=hit_hook, + cache_miss_hook=miss_hook, + # ... other hooks +) +result2 = profiler.profile_trace_processing(plugin_cache, reader, "plugin_lru") + +# Compare results +comparison = profiler.compare_results() +print(f"Plugin overhead: {comparison['performance_ratio']:.2f}x slower") +``` + +## Best Practices + +### For Plugin Cache Development + +1. **Profile Early**: Use profiling during plugin development to catch performance issues +2. **Minimize Hook Complexity**: Keep cache hook functions as simple as possible +3. **Avoid Python Callbacks in Hot Paths**: Consider C++ implementation for critical operations +4. **Use Efficient Data Structures**: Profile different Python data structures for cache storage + +### For Performance Analysis + +1. **Use Representative Workloads**: Profile with realistic trace data and cache sizes +2. **Run Multiple Iterations**: Performance can vary between runs +3. **Focus on Hot Functions**: Look at cumulative time to find the biggest bottlenecks +4. **Compare Implementations**: Use profiling to validate optimization efforts + +### Memory Profiling + +```python +# Track memory usage patterns +profiler = CacheSimProfiler() + +with profiler.profile_context("memory_analysis") as result: + # Code that might have memory issues + large_cache = PluginCache(cache_size=100000, ...) + result.custom_metrics['cache_size'] = large_cache.cache_size + +print(f"Memory peak: {result.memory_peak:.2f}MB") +``` + +## Example: Finding Plugin Cache Bottlenecks + +The provided `examples/profiling_analysis.py` demonstrates: + +1. Creating mock plugin caches with artificial delays +2. Comparing native vs plugin implementations +3. Identifying specific bottlenecks in cProfile output +4. Generating comprehensive performance reports + +Run it with: +```bash +cd libCacheSim-python +python examples/profiling_analysis.py +``` + +This shows a 397x performance difference between fast and slow plugin implementations, with detailed function-level analysis showing exactly where time is spent. + +## Integration with libCacheSim + +When the full libCacheSim package is built, the profiler integrates seamlessly: + +```python +import libcachesim as lcs +from libcachesim.profiler import CacheSimProfiler + +# Create actual cache and reader +cache = lcs.S3FIFO(cache_size=1024*1024) +reader = lcs.TraceReader(trace="path/to/trace", trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE) + +# Profile real operations +profiler = CacheSimProfiler() +result = profiler.profile_trace_processing(cache, reader) +``` + +## Troubleshooting + +### Common Issues + +1. **ModuleNotFoundError for psutil**: Install with `pip install psutil` +2. **Empty cProfile output**: Ensure the profiled code actually runs and takes measurable time +3. **High memory usage**: Large traces or cache sizes can use significant memory during profiling + +### Performance Tips + +- For large traces, use `num_requests` parameter to profile subset of requests +- Clear profiler results with `profiler.clear_results()` between different test runs +- Use the context manager for fine-grained profiling of specific code sections + +This profiling system provides the cProfile results requested in the issue and enables comprehensive performance analysis of plugin cache and trace processing operations. \ No newline at end of file diff --git a/examples/profiling_analysis.py b/examples/profiling_analysis.py new file mode 100644 index 0000000..f691a87 --- /dev/null +++ b/examples/profiling_analysis.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python3 +""" +Example demonstrating profiling analysis for plugin cache and trace processing. + +This script shows how to use the profiling utilities to analyze performance +bottlenecks in plugin cache implementations and trace processing operations. + +This is a standalone example that works without the compiled libcachesim package. +""" + +import sys +import os +from pathlib import Path +import time +from collections import OrderedDict + +# Import profiler directly +sys.path.insert(0, str(Path(__file__).parent.parent / "libcachesim")) +from profiler import CacheSimProfiler, profile_cache_operations, profile_trace_processing + + +# Simulate libcachesim imports (these would normally come from the built package) +class MockRequest: + """Mock request class for testing.""" + def __init__(self, obj_id, obj_size=1, valid=True): + self.obj_id = obj_id + self.obj_size = obj_size + self.valid = valid + + +class MockReader: + """Mock trace reader for testing.""" + def __init__(self, num_requests=10000, num_objects=1000): + self.num_requests = num_requests + self.num_objects = num_objects + self.current_req = 0 + + def reset(self): + self.current_req = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.current_req >= self.num_requests: + raise StopIteration + + # Simple zipf-like distribution + obj_id = hash(str(self.current_req)) % self.num_objects + req = MockRequest(obj_id, obj_size=100) + self.current_req += 1 + return req + + +class MockCommonCacheParams: + """Mock common cache parameters.""" + def __init__(self, cache_size): + self.cache_size = cache_size + + +# Plugin cache implementation - simulates real libcachesim.PluginCache +class StandaloneLRU: + """Standalone LRU implementation for plugin cache example.""" + def __init__(self, cache_size): + self.cache_data = OrderedDict() + self.cache_size = cache_size + + def cache_hit(self, obj_id): + if obj_id in self.cache_data: + obj_size = self.cache_data.pop(obj_id) + self.cache_data[obj_id] = obj_size + return True + return False + + def cache_miss(self, obj_id, obj_size): + # Add to cache, evict if necessary + while len(self.cache_data) >= self.cache_size: + self.cache_data.popitem(last=False) # Remove oldest + self.cache_data[obj_id] = obj_size + + def cache_eviction(self): + if self.cache_data: + evicted_id, _ = self.cache_data.popitem(last=False) + return evicted_id + return None + + def cache_remove(self, obj_id): + if obj_id in self.cache_data: + del self.cache_data[obj_id] + + def get_size(self): + return len(self.cache_data) + + +class MockPluginCache: + """Mock plugin cache that simulates the real PluginCache with Python hooks.""" + + def __init__(self, cache_size, cache_init_hook, cache_hit_hook, cache_miss_hook, + cache_eviction_hook, cache_remove_hook, cache_free_hook=None, cache_name="MockPlugin"): + self.cache_size = cache_size + self.cache_name = cache_name + + # Initialize hooks + self.cache_hit_hook = cache_hit_hook + self.cache_miss_hook = cache_miss_hook + self.cache_eviction_hook = cache_eviction_hook + self.cache_remove_hook = cache_remove_hook + self.cache_free_hook = cache_free_hook + + # Initialize cache data using init hook + common_params = MockCommonCacheParams(cache_size) + self.cache_data = cache_init_hook(common_params) + + # Statistics + self.n_hits = 0 + self.n_misses = 0 + + def get(self, request): + """Process a cache request.""" + # Check if it's a hit + if self.cache_data.cache_hit(request.obj_id): + self.cache_hit_hook(self.cache_data, request) + self.n_hits += 1 + return True + else: + # Cache miss + self.cache_miss_hook(self.cache_data, request) + self.n_misses += 1 + + # Check if eviction is needed + if self.cache_data.get_size() >= self.cache_size: + evicted = self.cache_eviction_hook(self.cache_data, request) + + return False + + def process_trace(self, reader): + """Process entire trace (simulated).""" + reader.reset() + n_hit = 0 + n_total = 0 + bytes_hit = 0 + bytes_total = 0 + + for request in reader: + n_total += 1 + bytes_total += request.obj_size + + if self.get(request): + n_hit += 1 + bytes_hit += request.obj_size + + obj_miss_ratio = 1.0 - (n_hit / n_total) if n_total > 0 else 0.0 + byte_miss_ratio = 1.0 - (bytes_hit / bytes_total) if bytes_total > 0 else 0.0 + + return obj_miss_ratio, byte_miss_ratio + + +class MockLRUCache: + """Simple LRU cache for comparison.""" + + def __init__(self, cache_size): + self.cache_size = cache_size + self.cache_data = OrderedDict() + self.n_hits = 0 + self.n_misses = 0 + + def get(self, request): + """Process a cache request.""" + if request.obj_id in self.cache_data: + # Hit - move to end + self.cache_data.move_to_end(request.obj_id) + self.n_hits += 1 + return True + else: + # Miss - add to cache + if len(self.cache_data) >= self.cache_size: + self.cache_data.popitem(last=False) # Remove oldest + + self.cache_data[request.obj_id] = request.obj_size + self.n_misses += 1 + return False + + def process_trace(self, reader): + """Process entire trace.""" + reader.reset() + n_hit = 0 + n_total = 0 + bytes_hit = 0 + bytes_total = 0 + + for request in reader: + n_total += 1 + bytes_total += request.obj_size + + if self.get(request): + n_hit += 1 + bytes_hit += request.obj_size + + obj_miss_ratio = 1.0 - (n_hit / n_total) if n_total > 0 else 0.0 + byte_miss_ratio = 1.0 - (bytes_hit / bytes_total) if bytes_total > 0 else 0.0 + + return obj_miss_ratio, byte_miss_ratio + + +# Plugin cache hook functions +def cache_init_hook(common_cache_params): + return StandaloneLRU(common_cache_params.cache_size) + + +def cache_hit_hook(cache_data, request): + # This gets called on every cache hit + pass # In real implementation, might update statistics + + +def cache_miss_hook(cache_data, request): + # This gets called on every cache miss + pass # In real implementation, might update statistics + + +def cache_eviction_hook(cache_data, request): + # This gets called when eviction is needed + return cache_data.cache_eviction() + + +def cache_remove_hook(cache_data, obj_id): + cache_data.cache_remove(obj_id) + + +def cache_free_hook(cache_data): + cache_data.cache_data.clear() + + +def create_plugin_cache(cache_size=1024): + """Create a plugin cache for testing.""" + return MockPluginCache( + cache_size=cache_size, + cache_init_hook=cache_init_hook, + cache_hit_hook=cache_hit_hook, + cache_miss_hook=cache_miss_hook, + cache_eviction_hook=cache_eviction_hook, + cache_remove_hook=cache_remove_hook, + cache_free_hook=cache_free_hook, + cache_name="ProfiledPluginLRU" + ) + + +def simulate_slow_plugin_operations(): + """Simulate slow plugin cache operations with artificial delays.""" + + class SlowStandaloneLRU(StandaloneLRU): + """LRU with artificial delays to simulate performance issues.""" + + def cache_hit(self, obj_id): + # Simulate slow hit processing + time.sleep(0.0001) # 0.1ms delay + return super().cache_hit(obj_id) + + def cache_miss(self, obj_id, obj_size): + # Simulate slow miss processing + time.sleep(0.0002) # 0.2ms delay + super().cache_miss(obj_id, obj_size) + + def cache_eviction(self): + # Simulate slow eviction + time.sleep(0.0003) # 0.3ms delay + return super().cache_eviction() + + def slow_cache_init_hook(common_cache_params): + return SlowStandaloneLRU(common_cache_params.cache_size) + + def slow_cache_hit_hook(cache_data, request): + # Simulate slow Python callback + time.sleep(0.00005) # 0.05ms delay + + def slow_cache_miss_hook(cache_data, request): + # Simulate slow Python callback + time.sleep(0.00005) # 0.05ms delay + + return MockPluginCache( + cache_size=1024, + cache_init_hook=slow_cache_init_hook, + cache_hit_hook=slow_cache_hit_hook, + cache_miss_hook=slow_cache_miss_hook, + cache_eviction_hook=cache_eviction_hook, + cache_remove_hook=cache_remove_hook, + cache_free_hook=cache_free_hook, + cache_name="SlowPluginLRU" + ) + + +def main(): + """Main demonstration of profiling capabilities.""" + print("libCacheSim Profiling Analysis Example") + print("=" * 50) + + # Create output directory + output_dir = "profiling_example_results" + profiler = CacheSimProfiler(output_dir) + + # Create test data + print("Setting up test environment...") + reader = MockReader(num_requests=10000, num_objects=1000) + + # Test different cache implementations + caches_to_test = [ + ("Native LRU", MockLRUCache(1024)), + ("Plugin LRU", create_plugin_cache(1024)), + ("Slow Plugin LRU", simulate_slow_plugin_operations()) + ] + + print(f"Created mock trace with {reader.num_requests} requests across {reader.num_objects} objects") + print() + + # Profile each cache implementation + for cache_name, cache in caches_to_test: + print(f"Profiling {cache_name}...") + + # Profile cache operations (individual get requests) + print(f" - Cache operations (1000 requests)...") + result = profiler.profile_plugin_cache_operations( + cache, reader, num_requests=1000, + method_name=f"{cache_name}_operations" + ) + print(f" Time: {result.execution_time:.4f}s, Memory: {result.memory_peak:.2f}MB") + + # Profile full trace processing + print(f" - Full trace processing...") + result = profiler.profile_trace_processing( + cache, reader, + method_name=f"{cache_name}_trace_processing" + ) + print(f" Time: {result.execution_time:.4f}s, Memory: {result.memory_peak:.2f}MB") + print(f" Miss ratio: {result.custom_metrics.get('obj_miss_ratio', 'N/A'):.4f}") + print() + + # Generate comprehensive analysis + print("Generating analysis reports...") + + # Save detailed profile stats for each result + for result in profiler.results: + stats_file = profiler.save_profile_stats(result) + print(f" Saved detailed stats: {stats_file}") + + # Generate performance report + report_file = profiler.generate_performance_report() + print(f" Generated performance report: {report_file}") + + # Export results in different formats + json_file = profiler.export_results_json() + print(f" Exported JSON results: {json_file}") + + csv_file = profiler.export_results_csv() + print(f" Exported CSV summary: {csv_file}") + + # Compare results + print("\nPerformance Comparison:") + print("-" * 30) + comparison = profiler.compare_results() + + if 'error' not in comparison: + print(f"Fastest method: {comparison['fastest_method']}") + print(f"Slowest method: {comparison['slowest_method']}") + print(f"Performance ratio: {comparison['performance_ratio']:.2f}x") + print(f"Memory ratio: {comparison['memory_ratio']:.2f}x") + + print("\nDetailed Comparisons:") + for comp in comparison['detailed_comparisons']: + print(f" {comp['method_name']}: {comp['relative_time']:.2f}x time, " + f"{comp['relative_memory']:.2f}x memory") + else: + print(f"Comparison failed: {comparison['error']}") + + # Demonstrate context manager usage + print("\nDemonstrating context manager profiling...") + cache = create_plugin_cache(512) + reader_small = MockReader(num_requests=1000, num_objects=100) + + with profiler.profile_context("context_example") as profile_result: + # Your code to profile goes here + n_processed = 0 + reader_small.reset() + for request in reader_small: + cache.get(request) + n_processed += 1 + if n_processed >= 500: + break + + # You can add custom metrics within the context + profile_result.custom_metrics['requests_processed'] = n_processed + profile_result.custom_metrics['cache_hits'] = cache.n_hits + profile_result.custom_metrics['cache_misses'] = cache.n_misses + + print(f"Context profiling completed: {profile_result.execution_time:.4f}s") + print(f"Processed {profile_result.custom_metrics['requests_processed']} requests") + + print("\n" + "=" * 50) + print("Profiling analysis complete!") + print(f"Results saved in: {output_dir}/") + print("=" * 50) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/real_world_profiling.py b/examples/real_world_profiling.py new file mode 100644 index 0000000..649b178 --- /dev/null +++ b/examples/real_world_profiling.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +""" +Real-world profiling example for libCacheSim plugin cache performance. + +This example demonstrates profiling with actual libcachesim components +when the package is properly built and installed. +""" + +try: + import libcachesim as lcs + from libcachesim.profiler import CacheSimProfiler + LIBCACHESIM_AVAILABLE = True +except ImportError: + # Fallback for when libcachesim is not built + import sys + from pathlib import Path + sys.path.insert(0, str(Path(__file__).parent.parent / "libcachesim")) + from profiler import CacheSimProfiler + LIBCACHESIM_AVAILABLE = False + print("Note: Using standalone profiler without compiled libcachesim") + + +def profile_with_real_libcachesim(): + """Profile using real libcachesim components.""" + profiler = CacheSimProfiler("real_profiling_results") + + # Create synthetic reader (works without compiled package) + reader = lcs.SyntheticReader( + num_of_req=10000, + num_objects=1000, + obj_size=100, + seed=42, + alpha=0.8, + dist="zipf" + ) + + # Compare different cache types + caches_to_test = [ + ("LRU", lcs.LRU(cache_size=1024)), + ("S3FIFO", lcs.S3FIFO(cache_size=1024)), + ("Plugin LRU", create_plugin_lru_cache()) + ] + + for cache_name, cache in caches_to_test: + print(f"Profiling {cache_name}...") + + # Profile trace processing + result = profiler.profile_trace_processing( + cache, reader, method_name=f"{cache_name}_processing" + ) + print(f" Time: {result.execution_time:.4f}s") + print(f" Miss ratio: {result.custom_metrics.get('obj_miss_ratio', 'N/A'):.4f}") + + # Generate comparison report + comparison = profiler.compare_results() + print(f"\nFastest: {comparison['fastest_method']}") + print(f"Slowest: {comparison['slowest_method']}") + print(f"Performance range: {comparison['performance_ratio']:.2f}x") + + # Save detailed results + profiler.generate_performance_report() + profiler.export_results_json() + + +def create_plugin_lru_cache(): + """Create a plugin LRU cache for comparison.""" + from collections import OrderedDict + + class PluginLRUData: + def __init__(self, cache_size): + self.cache_data = OrderedDict() + self.cache_size = cache_size + + def cache_init_hook(common_params): + return PluginLRUData(common_params.cache_size) + + def cache_hit_hook(data, request): + if request.obj_id in data.cache_data: + data.cache_data.move_to_end(request.obj_id) + + def cache_miss_hook(data, request): + while len(data.cache_data) >= data.cache_size: + data.cache_data.popitem(last=False) + data.cache_data[request.obj_id] = request.obj_size + + def cache_eviction_hook(data, request): + if data.cache_data: + return next(iter(data.cache_data)) + return None + + def cache_remove_hook(data, obj_id): + data.cache_data.pop(obj_id, None) + + def cache_free_hook(data): + data.cache_data.clear() + + return lcs.PluginCache( + cache_size=1024, + cache_init_hook=cache_init_hook, + cache_hit_hook=cache_hit_hook, + cache_miss_hook=cache_miss_hook, + cache_eviction_hook=cache_eviction_hook, + cache_remove_hook=cache_remove_hook, + cache_free_hook=cache_free_hook, + cache_name="PluginLRU" + ) + + +def profile_with_fallback(): + """Fallback profiling when libcachesim is not available.""" + print("Running fallback profiling example...") + + # Use the mock implementation from profiling_analysis.py + import sys + from pathlib import Path + + # Import the mock classes + exec(open(Path(__file__).parent / "profiling_analysis.py").read()) + + profiler = CacheSimProfiler("fallback_profiling_results") + + # Create test components + reader = MockReader(num_requests=5000, num_objects=500) + + caches = [ + ("Native LRU", MockLRUCache(1024)), + ("Plugin LRU", create_plugin_cache(1024)) + ] + + for cache_name, cache in caches: + print(f"Profiling {cache_name}...") + result = profiler.profile_trace_processing(cache, reader, f"{cache_name}_test") + print(f" Time: {result.execution_time:.4f}s") + print(f" Miss ratio: {result.custom_metrics.get('obj_miss_ratio', 'N/A'):.4f}") + + profiler.generate_performance_report() + + +def main(): + """Main function to run appropriate profiling example.""" + print("libCacheSim Real-World Profiling Example") + print("=" * 50) + + if LIBCACHESIM_AVAILABLE: + try: + profile_with_real_libcachesim() + except Exception as e: + print(f"Error with real libcachesim: {e}") + print("Falling back to mock profiling...") + profile_with_fallback() + else: + profile_with_fallback() + + print("\nProfiling completed! Check the generated report files.") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/libcachesim/__init__.py b/libcachesim/__init__.py index 38a6fa6..e3c27c7 100644 --- a/libcachesim/__init__.py +++ b/libcachesim/__init__.py @@ -60,6 +60,7 @@ from .trace_analyzer import TraceAnalyzer from .synthetic_reader import SyntheticReader, create_zipf_requests, create_uniform_requests from .util import Util +from .profiler import CacheSimProfiler, ProfileResult, profile_cache_operations, profile_trace_processing __all__ = [ # Core classes @@ -119,6 +120,11 @@ "create_uniform_requests", # Utilities "Util", + # Profiling + "CacheSimProfiler", + "ProfileResult", + "profile_cache_operations", + "profile_trace_processing", # Metadata "__doc__", "__version__", diff --git a/libcachesim/profiler.py b/libcachesim/profiler.py new file mode 100644 index 0000000..2213d1a --- /dev/null +++ b/libcachesim/profiler.py @@ -0,0 +1,639 @@ +"""Profiling utilities for libCacheSim performance analysis. + +This module provides comprehensive profiling capabilities for analyzing the performance +of cache simulations, particularly plugin caches and trace processing operations. +""" + +import cProfile +import pstats +import io +import time +import tracemalloc +import psutil +import os +import sys +from pathlib import Path +from typing import Dict, List, Tuple, Optional, Callable, Any, Union +from dataclasses import dataclass, field +from contextlib import contextmanager +import json +import csv + + +@dataclass +class ProfileResult: + """Container for profiling results.""" + method_name: str + execution_time: float + memory_peak: float + memory_current: float + cpu_percent: float + profile_stats: Optional[pstats.Stats] = None + custom_metrics: Dict[str, Any] = field(default_factory=dict) + + +class CacheSimProfiler: + """Comprehensive profiler for cache simulation operations.""" + + def __init__(self, output_dir: str = "profiling_results"): + """Initialize the profiler. + + Args: + output_dir: Directory to save profiling results + """ + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + self.results: List[ProfileResult] = [] + self.current_profile: Optional[cProfile.Profile] = None + self.start_time: float = 0 + self.process = psutil.Process(os.getpid()) + + def profile_function( + self, + func: Callable, + *args, + method_name: str = None, + **kwargs + ) -> Tuple[Any, ProfileResult]: + """Profile a single function call with comprehensive metrics. + + Args: + func: Function to profile + *args: Arguments for the function + method_name: Name for this profiling run + **kwargs: Keyword arguments for the function + + Returns: + Tuple of (function_result, ProfileResult) + """ + if method_name is None: + method_name = func.__name__ + + # Start memory tracking + tracemalloc.start() + + # Create profiler + profiler = cProfile.Profile() + + # Record initial state + memory_before = self.process.memory_info().rss / 1024 / 1024 # MB + start_time = time.perf_counter() + + # Run function with profiling + profiler.enable() + try: + result = func(*args, **kwargs) + finally: + profiler.disable() + + # Record final state + end_time = time.perf_counter() + memory_after = self.process.memory_info().rss / 1024 / 1024 # MB + + # Get memory peak from tracemalloc + current, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + + # Get CPU usage (approximate) + cpu_percent = self.process.cpu_percent() + + # Create stats object + stats_io = io.StringIO() + stats = pstats.Stats(profiler, stream=stats_io) + + # Create result + profile_result = ProfileResult( + method_name=method_name, + execution_time=end_time - start_time, + memory_peak=peak / 1024 / 1024, # Convert to MB + memory_current=memory_after - memory_before, + cpu_percent=cpu_percent, + profile_stats=stats + ) + + self.results.append(profile_result) + return result, profile_result + + @contextmanager + def profile_context(self, method_name: str): + """Context manager for profiling a block of code. + + Args: + method_name: Name for this profiling session + + Yields: + ProfileResult that will be populated when context exits + """ + # Start memory tracking + tracemalloc.start() + + # Create profiler + profiler = cProfile.Profile() + + # Record initial state + memory_before = self.process.memory_info().rss / 1024 / 1024 # MB + start_time = time.perf_counter() + + # Create result object (will be populated on exit) + profile_result = ProfileResult( + method_name=method_name, + execution_time=0, + memory_peak=0, + memory_current=0, + cpu_percent=0 + ) + + profiler.enable() + try: + yield profile_result + finally: + profiler.disable() + + # Record final state + end_time = time.perf_counter() + memory_after = self.process.memory_info().rss / 1024 / 1024 # MB + + # Get memory peak from tracemalloc + current, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + + # Get CPU usage + cpu_percent = self.process.cpu_percent() + + # Create stats object + stats_io = io.StringIO() + stats = pstats.Stats(profiler, stream=stats_io) + + # Update result + profile_result.execution_time = end_time - start_time + profile_result.memory_peak = peak / 1024 / 1024 # Convert to MB + profile_result.memory_current = memory_after - memory_before + profile_result.cpu_percent = cpu_percent + profile_result.profile_stats = stats + + self.results.append(profile_result) + + def profile_plugin_cache_operations( + self, + cache, + reader, + num_requests: int = 1000, + method_name: str = "plugin_cache_operations" + ) -> ProfileResult: + """Profile plugin cache operations with detailed function-level analysis. + + Args: + cache: Cache object to profile + reader: Reader object for trace data + num_requests: Number of requests to process + method_name: Name for this profiling run + + Returns: + ProfileResult with detailed profiling information + """ + def run_cache_operations(): + """Run cache operations for profiling.""" + reader.reset() + n_hit = 0 + n_miss = 0 + + for i, request in enumerate(reader): + if i >= num_requests: + break + + hit = cache.get(request) + if hit: + n_hit += 1 + else: + n_miss += 1 + + return { + 'n_hit': n_hit, + 'n_miss': n_miss, + 'n_total': i + 1, + 'hit_ratio': n_hit / (i + 1) if i >= 0 else 0 + } + + result, profile_result = self.profile_function(run_cache_operations, method_name=method_name) + profile_result.custom_metrics = result + return profile_result + + def profile_trace_processing( + self, + cache, + reader, + method_name: str = "trace_processing" + ) -> ProfileResult: + """Profile complete trace processing operation. + + Args: + cache: Cache object to use + reader: Reader object for trace data + method_name: Name for this profiling run + + Returns: + ProfileResult with profiling information + """ + def run_trace_processing(): + """Run trace processing for profiling.""" + # Try to use process_trace if available + if hasattr(cache, 'process_trace'): + return cache.process_trace(reader) + else: + # Fallback to manual processing + reader.reset() + n_hit = 0 + n_total = 0 + bytes_hit = 0 + bytes_total = 0 + + for request in reader: + n_total += 1 + bytes_total += getattr(request, 'obj_size', 1) + + if cache.get(request): + n_hit += 1 + bytes_hit += getattr(request, 'obj_size', 1) + + obj_miss_ratio = 1.0 - (n_hit / n_total) if n_total > 0 else 0.0 + byte_miss_ratio = 1.0 - (bytes_hit / bytes_total) if bytes_total > 0 else 0.0 + + return obj_miss_ratio, byte_miss_ratio + + result, profile_result = self.profile_function(run_trace_processing, method_name=method_name) + if isinstance(result, tuple) and len(result) == 2: + profile_result.custom_metrics = { + 'obj_miss_ratio': result[0], + 'byte_miss_ratio': result[1] + } + else: + profile_result.custom_metrics = {'result': str(result)} + + return profile_result + + def analyze_profile_stats(self, result: ProfileResult, top_n: int = 20) -> Dict[str, Any]: + """Analyze profile statistics and extract key insights. + + Args: + result: ProfileResult to analyze + top_n: Number of top functions to analyze + + Returns: + Dictionary with analysis results + """ + if not result.profile_stats: + return {} + + stats = result.profile_stats + + # Get top functions by cumulative time + stats.sort_stats('cumulative') + + # Capture stats output + stats_io = io.StringIO() + stats.print_stats(top_n) + stats_output = stats_io.getvalue() + + # Get total function calls + total_calls = stats.total_calls + + # Get top functions by total time + stats.sort_stats('tottime') + top_time_io = io.StringIO() + old_stdout = sys.stdout + sys.stdout = top_time_io + stats.print_stats(top_n) + sys.stdout = old_stdout + top_time_output = top_time_io.getvalue() + + # Extract function call information + function_stats = [] + for func, (cc, nc, tt, ct, callers) in stats.stats.items(): + function_stats.append({ + 'function': f"{func[0]}:{func[1]}({func[2]})", + 'call_count': cc, + 'total_time': tt, + 'cumulative_time': ct, + 'time_per_call': tt / cc if cc > 0 else 0 + }) + + # Sort by total time and get top functions + function_stats.sort(key=lambda x: x['total_time'], reverse=True) + top_functions = function_stats[:top_n] + + return { + 'total_calls': total_calls, + 'top_functions_by_time': top_functions, + 'cumulative_stats_output': stats_output, + 'total_time_stats_output': top_time_output, + 'function_count': len(function_stats) + } + + def save_profile_stats(self, result: ProfileResult, filename: str = None) -> str: + """Save detailed profile statistics to file. + + Args: + result: ProfileResult to save + filename: Optional filename, otherwise auto-generated + + Returns: + Path to saved file + """ + if filename is None: + timestamp = int(time.time()) + filename = f"profile_{result.method_name}_{timestamp}.txt" + + filepath = self.output_dir / filename + + with open(filepath, 'w') as f: + f.write(f"Profile Results for: {result.method_name}\n") + f.write("=" * 60 + "\n") + f.write(f"Execution Time: {result.execution_time:.4f} seconds\n") + f.write(f"Memory Peak: {result.memory_peak:.2f} MB\n") + f.write(f"Memory Change: {result.memory_current:.2f} MB\n") + f.write(f"CPU Percent: {result.cpu_percent:.1f}%\n") + f.write("\nCustom Metrics:\n") + for key, value in result.custom_metrics.items(): + f.write(f" {key}: {value}\n") + f.write("\n" + "=" * 60 + "\n") + + if result.profile_stats: + # Save cumulative time stats + f.write("\nTop Functions by Cumulative Time:\n") + f.write("-" * 40 + "\n") + result.profile_stats.sort_stats('cumulative') + + # Capture stats output using StringIO + stats_io = io.StringIO() + result.profile_stats.stream = stats_io + result.profile_stats.print_stats(30) + f.write(stats_io.getvalue()) + + f.write("\n" + "-" * 40 + "\n") + f.write("Top Functions by Total Time:\n") + f.write("-" * 40 + "\n") + + result.profile_stats.sort_stats('tottime') + stats_io = io.StringIO() + result.profile_stats.stream = stats_io + result.profile_stats.print_stats(30) + f.write(stats_io.getvalue()) + + return str(filepath) + + def generate_performance_report(self, filename: str = None) -> str: + """Generate a comprehensive performance report for all profiling results. + + Args: + filename: Optional filename, otherwise auto-generated + + Returns: + Path to generated report + """ + if filename is None: + timestamp = int(time.time()) + filename = f"performance_report_{timestamp}.txt" + + filepath = self.output_dir / filename + + with open(filepath, 'w') as f: + f.write("libCacheSim Performance Analysis Report\n") + f.write("=" * 60 + "\n") + f.write(f"Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}\n") + f.write(f"Total Profile Runs: {len(self.results)}\n\n") + + # Summary table + f.write("Summary of All Profile Runs:\n") + f.write("-" * 60 + "\n") + f.write(f"{'Method':<25} {'Time (s)':<12} {'Memory (MB)':<12} {'CPU %':<8}\n") + f.write("-" * 60 + "\n") + + total_time = 0 + for result in self.results: + f.write(f"{result.method_name:<25} {result.execution_time:<12.4f} " + f"{result.memory_peak:<12.2f} {result.cpu_percent:<8.1f}\n") + total_time += result.execution_time + + f.write("-" * 60 + "\n") + f.write(f"{'TOTAL':<25} {total_time:<12.4f}\n") + f.write("\n") + + # Detailed analysis for each run + for i, result in enumerate(self.results, 1): + f.write(f"\n{i}. Detailed Analysis: {result.method_name}\n") + f.write("=" * 50 + "\n") + + f.write(f"Execution Time: {result.execution_time:.4f} seconds\n") + f.write(f"Memory Peak: {result.memory_peak:.2f} MB\n") + f.write(f"Memory Change: {result.memory_current:.2f} MB\n") + f.write(f"CPU Usage: {result.cpu_percent:.1f}%\n") + + if result.custom_metrics: + f.write("\nCustom Metrics:\n") + for key, value in result.custom_metrics.items(): + f.write(f" {key}: {value}\n") + + # Add function analysis + analysis = self.analyze_profile_stats(result) + if analysis: + f.write(f"\nFunction Call Analysis:\n") + f.write(f" Total Function Calls: {analysis.get('total_calls', 'N/A')}\n") + f.write(f" Unique Functions: {analysis.get('function_count', 'N/A')}\n") + + top_funcs = analysis.get('top_functions_by_time', [])[:5] + if top_funcs: + f.write("\n Top 5 Functions by Time:\n") + for func in top_funcs: + f.write(f" {func['function']}: {func['total_time']:.4f}s " + f"({func['call_count']} calls)\n") + + f.write("\n") + + return str(filepath) + + def export_results_json(self, filename: str = None) -> str: + """Export all results to JSON format. + + Args: + filename: Optional filename, otherwise auto-generated + + Returns: + Path to exported JSON file + """ + if filename is None: + timestamp = int(time.time()) + filename = f"profile_results_{timestamp}.json" + + filepath = self.output_dir / filename + + # Convert results to JSON-serializable format + json_results = [] + for result in self.results: + json_result = { + 'method_name': result.method_name, + 'execution_time': result.execution_time, + 'memory_peak': result.memory_peak, + 'memory_current': result.memory_current, + 'cpu_percent': result.cpu_percent, + 'custom_metrics': result.custom_metrics + } + + # Add function stats if available + if result.profile_stats: + analysis = self.analyze_profile_stats(result) + json_result['analysis'] = analysis + + json_results.append(json_result) + + with open(filepath, 'w') as f: + json.dump({ + 'generated_at': time.strftime('%Y-%m-%d %H:%M:%S'), + 'total_runs': len(self.results), + 'results': json_results + }, f, indent=2) + + return str(filepath) + + def export_results_csv(self, filename: str = None) -> str: + """Export results summary to CSV format. + + Args: + filename: Optional filename, otherwise auto-generated + + Returns: + Path to exported CSV file + """ + if filename is None: + timestamp = int(time.time()) + filename = f"profile_summary_{timestamp}.csv" + + filepath = self.output_dir / filename + + with open(filepath, 'w', newline='') as f: + writer = csv.writer(f) + + # Write header + header = ['method_name', 'execution_time', 'memory_peak_mb', 'memory_change_mb', 'cpu_percent'] + + # Add custom metrics columns + all_metrics = set() + for result in self.results: + all_metrics.update(result.custom_metrics.keys()) + + header.extend(sorted(all_metrics)) + writer.writerow(header) + + # Write data rows + for result in self.results: + row = [ + result.method_name, + result.execution_time, + result.memory_peak, + result.memory_current, + result.cpu_percent + ] + + # Add custom metrics values + for metric in sorted(all_metrics): + row.append(result.custom_metrics.get(metric, '')) + + writer.writerow(row) + + return str(filepath) + + def compare_results(self, method_names: List[str] = None) -> Dict[str, Any]: + """Compare performance across different profiled methods. + + Args: + method_names: Optional list of method names to compare. If None, compares all. + + Returns: + Dictionary with comparison analysis + """ + if method_names is None: + results_to_compare = self.results + else: + results_to_compare = [r for r in self.results if r.method_name in method_names] + + if len(results_to_compare) < 2: + return {'error': 'Need at least 2 results to compare'} + + # Find best and worst performers + fastest = min(results_to_compare, key=lambda r: r.execution_time) + slowest = max(results_to_compare, key=lambda r: r.execution_time) + + lowest_memory = min(results_to_compare, key=lambda r: r.memory_peak) + highest_memory = max(results_to_compare, key=lambda r: r.memory_peak) + + # Calculate relative performance + comparisons = [] + for result in results_to_compare: + relative_time = result.execution_time / fastest.execution_time + relative_memory = result.memory_peak / lowest_memory.memory_peak if lowest_memory.memory_peak > 0 else 1 + + comparisons.append({ + 'method_name': result.method_name, + 'execution_time': result.execution_time, + 'relative_time': relative_time, + 'memory_peak': result.memory_peak, + 'relative_memory': relative_memory, + 'speedup_vs_slowest': slowest.execution_time / result.execution_time + }) + + return { + 'fastest_method': fastest.method_name, + 'slowest_method': slowest.method_name, + 'lowest_memory_method': lowest_memory.method_name, + 'highest_memory_method': highest_memory.method_name, + 'performance_ratio': slowest.execution_time / fastest.execution_time, + 'memory_ratio': highest_memory.memory_peak / lowest_memory.memory_peak if lowest_memory.memory_peak > 0 else 1, + 'detailed_comparisons': comparisons + } + + def clear_results(self): + """Clear all stored profiling results.""" + self.results.clear() + + +# Convenience function for quick profiling +def profile_cache_operations(cache, reader, num_requests: int = 1000, output_dir: str = "profiling_results") -> ProfileResult: + """Quick profiling of cache operations. + + Args: + cache: Cache object to profile + reader: Reader object for trace data + num_requests: Number of requests to process + output_dir: Directory to save results + + Returns: + ProfileResult with profiling information + """ + profiler = CacheSimProfiler(output_dir) + result = profiler.profile_plugin_cache_operations(cache, reader, num_requests) + + # Save detailed results + profiler.save_profile_stats(result) + profiler.generate_performance_report() + + return result + + +def profile_trace_processing(cache, reader, output_dir: str = "profiling_results") -> ProfileResult: + """Quick profiling of trace processing. + + Args: + cache: Cache object to use + reader: Reader object for trace data + output_dir: Directory to save results + + Returns: + ProfileResult with profiling information + """ + profiler = CacheSimProfiler(output_dir) + result = profiler.profile_trace_processing(cache, reader) + + # Save detailed results + profiler.save_profile_stats(result) + profiler.generate_performance_report() + + return result \ No newline at end of file diff --git a/tests/test_profiler.py b/tests/test_profiler.py new file mode 100644 index 0000000..46d5e8c --- /dev/null +++ b/tests/test_profiler.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Simple test for profiler functionality without compiled libcachesim package. +""" + +import sys +from pathlib import Path + +# Add libcachesim to path +sys.path.insert(0, str(Path(__file__).parent.parent / "libcachesim")) + +from profiler import CacheSimProfiler + + +def test_basic_profiling(): + """Test basic profiling functionality.""" + profiler = CacheSimProfiler("test_output") + + # Profile a simple function + def slow_function(): + """Simple function that takes some time.""" + result = 0 + for i in range(100000): + result += i * i + return result + + # Test function profiling + result, profile_result = profiler.profile_function(slow_function, method_name="slow_math") + + print(f"Function result: {result}") + print(f"Execution time: {profile_result.execution_time:.4f}s") + print(f"Memory peak: {profile_result.memory_peak:.2f}MB") + + # Test context manager + with profiler.profile_context("context_test") as profile_result: + data = [i**2 for i in range(10000)] + profile_result.custom_metrics['list_length'] = len(data) + + print(f"Context execution time: {profile_result.execution_time:.4f}s") + print(f"List length: {profile_result.custom_metrics['list_length']}") + + # Generate reports + report = profiler.generate_performance_report("test_report.txt") + json_file = profiler.export_results_json("test_results.json") + + print(f"Report saved: {report}") + print(f"JSON saved: {json_file}") + + print("Basic profiling test passed!") + + +if __name__ == "__main__": + test_basic_profiling() \ No newline at end of file