From ba418bb3f76ff441961671138678e89743de8df9 Mon Sep 17 00:00:00 2001 From: libraz Date: Sat, 31 May 2025 19:49:53 +0900 Subject: [PATCH] Configure black and fix lint --- examples/comprehensive_analysis_demo.py | 132 +++--- examples/optimized_performance_demo.py | 103 ++--- examples/performance_comparison.py | 78 ++-- pyproject.toml | 5 + src/bpm_detector/auto_parallel.py | 137 +++--- src/bpm_detector/chord_analysis.py | 155 ++++--- src/bpm_detector/chord_analyzer.py | 520 +++++++++++++--------- src/bpm_detector/cli.py | 301 ++++++++----- src/bpm_detector/context_analyzer.py | 251 ++++++----- src/bpm_detector/dynamics_analyzer.py | 457 ++++++++++--------- src/bpm_detector/effects_detector.py | 99 ++-- src/bpm_detector/instrument_classifier.py | 161 ++++--- src/bpm_detector/key_validation.py | 118 ++--- src/bpm_detector/parallel_analyzer.py | 412 +++++++++-------- src/bpm_detector/section_analyzer.py | 390 ++++++++-------- src/bpm_detector/section_processor.py | 424 +++++++++++------- src/bpm_detector/structure_analyzer.py | 133 +++--- tests/test_melody_harmony_analyzer.py | 156 ++++--- tests/test_music_analyzer.py | 380 ++++++++-------- tests/test_parallel_analyzer.py | 235 +++++----- tests/test_structure_analyzer.py | 256 ++++++----- tests/test_timbre_analyzer.py | 232 ++++++---- 22 files changed, 2865 insertions(+), 2270 deletions(-) diff --git a/examples/comprehensive_analysis_demo.py b/examples/comprehensive_analysis_demo.py index 722c252..cd5a715 100644 --- a/examples/comprehensive_analysis_demo.py +++ b/examples/comprehensive_analysis_demo.py @@ -3,7 +3,6 @@ import os import sys import numpy as np -import librosa # Add the src directory to the path so we can import bpm_detector sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) @@ -14,88 +13,84 @@ def create_test_audio(): """Create a simple test audio signal for demonstration.""" print("Creating test audio signal...") - + # Parameters duration = 30.0 # 30 seconds sr = 22050 - + # Generate time array t = np.linspace(0, duration, int(sr * duration)) - + # Create a simple musical signal # Base frequency (A4 = 440 Hz) - base_freq = 440.0 - + # Create a simple chord progression: A - F#m - D - E chord_freqs = [ [440, 554.37, 659.25], # A major [369.99, 440, 554.37], # F# minor [293.66, 369.99, 440], # D major - [329.63, 415.30, 493.88] # E major + [329.63, 415.30, 493.88], # E major ] - + # Create signal with chord progression signal = np.zeros_like(t) chord_duration = duration / 4 # Each chord lasts 1/4 of the song - + for i, chord in enumerate(chord_freqs): start_time = i * chord_duration end_time = (i + 1) * chord_duration - + # Find time indices for this chord chord_mask = (t >= start_time) & (t < end_time) chord_t = t[chord_mask] - + # Generate chord tones chord_signal = np.zeros_like(chord_t) for freq in chord: chord_signal += 0.3 * np.sin(2 * np.pi * freq * chord_t) - + # Add some rhythm (simple beat pattern) beat_freq = 2.0 # 120 BPM = 2 beats per second beat_pattern = (np.sin(2 * np.pi * beat_freq * chord_t) > 0).astype(float) - chord_signal *= (0.5 + 0.5 * beat_pattern) - + chord_signal *= 0.5 + 0.5 * beat_pattern + signal[chord_mask] = chord_signal - + # Add some noise for realism signal += 0.05 * np.random.randn(len(signal)) - + # Normalize signal = signal / np.max(np.abs(signal)) - + return signal, sr def demonstrate_comprehensive_analysis(): """Demonstrate comprehensive music analysis.""" print("=== Comprehensive Music Analysis Demo ===\n") - + # Create test audio audio_signal, sr = create_test_audio() - + # Initialize analyzer print("Initializing AudioAnalyzer...") analyzer = AudioAnalyzer(sr=sr) - + # Save test audio temporarily test_file = "temp_test_audio.wav" import soundfile as sf + sf.write(test_file, audio_signal, sr) - + try: print(f"Analyzing test audio ({len(audio_signal)/sr:.1f} seconds)...\n") - + # Perform comprehensive analysis - results = analyzer.analyze_file( - test_file, - detect_key=True, - comprehensive=True - ) - + results = analyzer.analyze_file(test_file, detect_key=True, comprehensive=True) + # Display results print("--- ANALYSIS RESULTS ---\n") - + # Basic information basic_info = results.get('basic_info', {}) print("🎵 BASIC INFORMATION:") @@ -103,35 +98,41 @@ def demonstrate_comprehensive_analysis(): print(f" Key: {basic_info.get('key', 'Unknown')}") print(f" Duration: {basic_info.get('duration', 0):.1f} seconds") print(f" BPM Confidence: {basic_info.get('bpm_confidence', 0):.1f}%\n") - + # Chord progression chord_prog = results.get('chord_progression', {}) print("🎹 CHORD PROGRESSION:") main_prog = chord_prog.get('main_progression', []) if main_prog: print(f" Main progression: {' → '.join(main_prog)}") - print(f" Harmonic rhythm: {chord_prog.get('harmonic_rhythm', 0):.2f} changes/sec") + print( + f" Harmonic rhythm: {chord_prog.get('harmonic_rhythm', 0):.2f} changes/sec" + ) print(f" Chord complexity: {chord_prog.get('chord_complexity', 0):.1%}") print(f" Unique chords: {chord_prog.get('unique_chords', 0)}\n") - + # Structure structure = results.get('structure', {}) print("🏗️ STRUCTURE:") print(f" Form: {structure.get('form', 'Unknown')}") print(f" Sections: {structure.get('section_count', 0)}") print(f" Repetition ratio: {structure.get('repetition_ratio', 0):.1%}") - print(f" Structural complexity: {structure.get('structural_complexity', 0):.1%}") - + print( + f" Structural complexity: {structure.get('structural_complexity', 0):.1%}" + ) + sections = structure.get('sections', []) if sections: print(" Section breakdown:") for section in sections: - print(f" {section.get('type', 'unknown')}: " - f"{section.get('start_time', 0):.1f}s - " - f"{section.get('end_time', 0):.1f}s " - f"({section.get('duration', 0):.1f}s)") + print( + f" {section.get('type', 'unknown')}: " + f"{section.get('start_time', 0):.1f}s - " + f"{section.get('end_time', 0):.1f}s " + f"({section.get('duration', 0):.1f}s)" + ) print() - + # Rhythm rhythm = results.get('rhythm', {}) print("🥁 RHYTHM:") @@ -140,7 +141,7 @@ def demonstrate_comprehensive_analysis(): print(f" Syncopation level: {rhythm.get('syncopation_level', 0):.1%}") print(f" Rhythmic complexity: {rhythm.get('rhythmic_complexity', 0):.1%}") print(f" Swing ratio: {rhythm.get('swing_ratio', 0.5):.2f}\n") - + # Timbre timbre = results.get('timbre', {}) print("🎨 TIMBRE:") @@ -148,75 +149,82 @@ def demonstrate_comprehensive_analysis(): print(f" Warmth: {timbre.get('warmth', 0):.1%}") print(f" Roughness: {timbre.get('roughness', 0):.1%}") print(f" Density: {timbre.get('density', 0):.1%}") - + instruments = timbre.get('dominant_instruments', []) if instruments: print(" Detected instruments:") for inst in instruments[:3]: - print(f" {inst.get('instrument', 'unknown')}: " - f"{inst.get('confidence', 0):.1%} confidence") + print( + f" {inst.get('instrument', 'unknown')}: " + f"{inst.get('confidence', 0):.1%} confidence" + ) print() - + # Melody & Harmony melody_harmony = results.get('melody_harmony', {}) print("🎼 MELODY & HARMONY:") - + melodic_range = melody_harmony.get('melodic_range', {}) print(f" Melodic range: {melodic_range.get('range_octaves', 0):.1f} octaves") - + consonance = melody_harmony.get('consonance', {}) print(f" Consonance level: {consonance.get('consonance_level', 0):.1%}") - + harmony_complexity = melody_harmony.get('harmony_complexity', {}) - print(f" Harmonic complexity: {harmony_complexity.get('harmonic_complexity', 0):.1%}") + print( + f" Harmonic complexity: {harmony_complexity.get('harmonic_complexity', 0):.1%}" + ) print(f" Melody present: {melody_harmony.get('melody_present', False)}\n") - + # Dynamics dynamics = results.get('dynamics', {}) print("📊 DYNAMICS:") - + dynamic_range = dynamics.get('dynamic_range', {}) print(f" Dynamic range: {dynamic_range.get('dynamic_range_db', 0):.1f} dB") - + loudness = dynamics.get('loudness', {}) print(f" Average loudness: {loudness.get('average_loudness_db', -30):.1f} dB") print(f" Overall energy: {dynamics.get('overall_energy', 0):.1%}") - + climax_points = dynamics.get('climax_points', []) if climax_points: print(f" Climax points: {len(climax_points)}") for i, climax in enumerate(climax_points[:2]): - print(f" {i+1}. {climax.get('time', 0):.1f}s " - f"(intensity: {climax.get('intensity', 0):.1%})") + print( + f" {i+1}. {climax.get('time', 0):.1f}s " + f"(intensity: {climax.get('intensity', 0):.1%})" + ) print() - + # Reference tags tags = results.get('reference_tags', []) if tags: print("🏷️ REFERENCE TAGS:") print(f" {', '.join(tags)}\n") - + # Generate reference sheet print("📋 REFERENCE SHEET:") print("-" * 40) reference_sheet = analyzer.generate_reference_sheet(results) print(reference_sheet) - + # Feature vector info similarity_features = results.get('similarity_features', {}) feature_vector = similarity_features.get('feature_vector', []) if feature_vector: - print(f"\n🔢 FEATURE VECTOR:") + print("\n🔢 FEATURE VECTOR:") print(f" Length: {len(feature_vector)} features") print(f" Sample values: {feature_vector[:5]}") - + print("\n✅ Analysis completed successfully!") - + except Exception as e: print(f"❌ Error during analysis: {e}") import traceback + traceback.print_exc() - + finally: # Clean up temporary file if os.path.exists(test_file): @@ -225,4 +233,4 @@ def demonstrate_comprehensive_analysis(): if __name__ == "__main__": - demonstrate_comprehensive_analysis() \ No newline at end of file + demonstrate_comprehensive_analysis() diff --git a/examples/optimized_performance_demo.py b/examples/optimized_performance_demo.py index d4fb428..280a732 100644 --- a/examples/optimized_performance_demo.py +++ b/examples/optimized_performance_demo.py @@ -6,7 +6,6 @@ """ import time -import os import sys from pathlib import Path @@ -19,144 +18,140 @@ def benchmark_analysis(audio_file: str, iterations: int = 3): """Run analysis benchmark""" - + print(f"🎵 Audio file: {audio_file}") print(f"🔄 Iterations: {iterations}") print("=" * 60) - + # Standard analyzer print("\n📊 Standard Analyzer (Sequential Processing)") standard_analyzer = AudioAnalyzer() standard_times = [] - + for i in range(iterations): print(f" Run {i+1}/{iterations}...", end=" ") start_time = time.time() - + try: - results = standard_analyzer.analyze_file( - audio_file, - comprehensive=True, - detect_key=True + standard_analyzer.analyze_file( + audio_file, comprehensive=True, detect_key=True ) elapsed = time.time() - start_time standard_times.append(elapsed) print(f"✅ {elapsed:.2f}s") - + except Exception as e: print(f"❌ Error: {e}") return - + # Optimized parallel analyzer print("\n🚀 Optimized Parallel Analyzer") parallel_analyzer = SmartParallelAudioAnalyzer(auto_parallel=True) parallel_times = [] - + for i in range(iterations): print(f" Run {i+1}/{iterations}...", end=" ") start_time = time.time() - + try: - results = parallel_analyzer.analyze_file( + parallel_analyzer.analyze_file( audio_file, comprehensive=True, - progress_callback=None # Disable progress callback for accurate benchmarking + progress_callback=None, # Disable progress callback for accurate benchmarking ) elapsed = time.time() - start_time parallel_times.append(elapsed) print(f"✅ {elapsed:.2f}s") - + except Exception as e: print(f"❌ Error: {e}") return - + # Compare results print("\n" + "=" * 60) print("📈 Performance Comparison Results") print("=" * 60) - + avg_standard = sum(standard_times) / len(standard_times) avg_parallel = sum(parallel_times) / len(parallel_times) speedup = avg_standard / avg_parallel - - print(f"Standard Analyzer:") + + print("Standard Analyzer:") print(f" Average time: {avg_standard:.2f}s") print(f" Fastest time: {min(standard_times):.2f}s") print(f" Slowest time: {max(standard_times):.2f}s") - - print(f"\nOptimized Parallel Analyzer:") + + print("\nOptimized Parallel Analyzer:") print(f" Average time: {avg_parallel:.2f}s") print(f" Fastest time: {min(parallel_times):.2f}s") print(f" Slowest time: {max(parallel_times):.2f}s") - + print(f"\n🎯 Speedup Factor: {speedup:.2f}x") - + if speedup > 1: - improvement = ((speedup - 1) * 100) + improvement = (speedup - 1) * 100 print(f"💡 {improvement:.1f}% performance improvement achieved!") else: print("⚠️ Parallel processing may have limited benefits in this environment") - + # Performance details performance_summary = parallel_analyzer.get_performance_summary() if performance_summary: - print(f"\n📊 Detailed Performance Information:") + print("\n📊 Detailed Performance Information:") for task, stats in performance_summary.items(): print(f" {task}:") print(f" Average execution time: {stats['avg_execution_time']:.3f}s") print(f" Execution count: {stats['execution_count']}") - + # Cleanup parallel_analyzer.cleanup() def demo_multiple_files(): """Demo parallel processing of multiple files""" - + print("\n" + "=" * 60) print("🎼 Multiple Files Parallel Processing Demo") print("=" * 60) - + # Find test files test_files = [] examples_dir = Path(__file__).parent - + for ext in ['*.mp3', '*.wav', '*.flac', '*.m4a']: test_files.extend(examples_dir.glob(ext)) - + if len(test_files) < 2: print("⚠️ Multiple test files not found") print(" Please place multiple audio files in the examples/ directory") return - + # Test up to 3 files test_files = [str(f) for f in test_files[:3]] - + print(f"📁 Number of test files: {len(test_files)}") for i, file in enumerate(test_files, 1): print(f" {i}. {Path(file).name}") - + analyzer = SmartParallelAudioAnalyzer(auto_parallel=True) - + def progress_callback(progress, message): print(f"\r Progress: {progress:3d}% - {message}", end="", flush=True) - - print(f"\n🚀 Starting parallel analysis...") + + print("\n🚀 Starting parallel analysis...") start_time = time.time() - + try: results = analyzer.analyze_file( - test_files, - comprehensive=True, - progress_callback=progress_callback + test_files, comprehensive=True, progress_callback=progress_callback ) - + elapsed = time.time() - start_time print(f"\n✅ Complete! Total processing time: {elapsed:.2f}s") print(f"📊 Average per file: {elapsed/len(test_files):.2f}s") - + # Results summary - print(f"\n📋 Analysis Results Summary:") + print("\n📋 Analysis Results Summary:") for file_path, result in results.items(): filename = Path(file_path).name if 'error' in result: @@ -166,7 +161,7 @@ def progress_callback(progress, message): bpm = basic_info.get('bpm', 'N/A') key = basic_info.get('key', 'N/A') print(f" ✅ {filename}: BPM={bpm:.1f}, Key={key}") - + except Exception as e: print(f"\n❌ Error: {e}") finally: @@ -175,31 +170,31 @@ def progress_callback(progress, message): def main(): """Main execution function""" - + print("🎵 BPM Detector Optimized Performance Demo") print("=" * 60) - + # Find test files examples_dir = Path(__file__).parent test_files = [] - + for ext in ['*.mp3', '*.wav', '*.flac', '*.m4a']: test_files.extend(examples_dir.glob(ext)) - + if not test_files: print("❌ No test files found") print(" Please place audio files in the examples/ directory") print(" Supported formats: MP3, WAV, FLAC, M4A") return - + # Single file benchmark test_file = str(test_files[0]) benchmark_analysis(test_file) - + # Multiple files demo if available if len(test_files) > 1: demo_multiple_files() - + print("\n" + "=" * 60) print("🎉 Demo completed!") print("=" * 60) @@ -214,4 +209,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/performance_comparison.py b/examples/performance_comparison.py index fead465..1b5b665 100644 --- a/examples/performance_comparison.py +++ b/examples/performance_comparison.py @@ -16,98 +16,104 @@ def create_test_audio(duration): """Create test audio of specified duration.""" sr = 22050 t = np.linspace(0, duration, int(sr * duration)) - + # Create a simple chord progression chord_freqs = [ [261.63, 329.63, 392.00], # C major [220.00, 261.63, 329.63], # A minor [174.61, 220.00, 261.63], # F major - [196.00, 246.94, 293.66] # G major + [196.00, 246.94, 293.66], # G major ] - + signal = np.zeros_like(t) chord_duration = duration / 4 - + for i, chord in enumerate(chord_freqs): start_time = i * chord_duration end_time = (i + 1) * chord_duration - + chord_mask = (t >= start_time) & (t < end_time) chord_t = t[chord_mask] - + chord_signal = np.zeros_like(chord_t) for freq in chord: chord_signal += 0.3 * np.sin(2 * np.pi * freq * chord_t) - + # Add rhythm beat_freq = 2.0 beat_pattern = (np.sin(2 * np.pi * beat_freq * chord_t) > 0).astype(float) - chord_signal *= (0.5 + 0.5 * beat_pattern) - + chord_signal *= 0.5 + 0.5 * beat_pattern + signal[chord_mask] = chord_signal - + # Add noise signal += 0.05 * np.random.randn(len(signal)) signal = signal / np.max(np.abs(signal)) - + return signal, sr def benchmark_analysis(): """Benchmark different analysis modes.""" print("=== Performance Comparison ===\n") - + durations = [5, 10, 20, 30] # Different audio lengths analyzer = AudioAnalyzer() - + results = [] - + for duration in durations: print(f"Testing {duration}-second audio...") - + # Create test audio audio, sr = create_test_audio(duration) test_file = f"test_{duration}s.wav" sf.write(test_file, audio, sr) - + try: # Basic analysis start_time = time.time() - basic_results = analyzer.analyze_file(test_file, comprehensive=False) + analyzer.analyze_file(test_file, comprehensive=False) basic_time = time.time() - start_time - + # Comprehensive analysis start_time = time.time() - comp_results = analyzer.analyze_file(test_file, comprehensive=True) + analyzer.analyze_file(test_file, comprehensive=True) comp_time = time.time() - start_time - - results.append({ - 'duration': duration, - 'basic_time': basic_time, - 'comprehensive_time': comp_time, - 'ratio': comp_time / basic_time - }) - - print(f" Basic: {basic_time:.2f}s, Comprehensive: {comp_time:.2f}s, Ratio: {comp_time/basic_time:.1f}x") - + + results.append( + { + 'duration': duration, + 'basic_time': basic_time, + 'comprehensive_time': comp_time, + 'ratio': comp_time / basic_time, + } + ) + + print( + f" Basic: {basic_time:.2f}s, Comprehensive: {comp_time:.2f}s, Ratio: {comp_time/basic_time:.1f}x" + ) + except Exception as e: print(f" Error: {e}") - + finally: if os.path.exists(test_file): os.remove(test_file) - + print("\n=== Summary ===") print("Duration | Basic Time | Comprehensive Time | Speed Ratio") print("-" * 55) for r in results: - print(f"{r['duration']:8}s | {r['basic_time']:10.2f}s | {r['comprehensive_time']:18.2f}s | {r['ratio']:10.1f}x") - + print( + f"{r['duration']:8}s | {r['basic_time']:10.2f}s | {r['comprehensive_time']:18.2f}s | {r['ratio']:10.1f}x" + ) + # Performance recommendations print("\n=== Recommendations ===") avg_ratio = np.mean([r['ratio'] for r in results]) print(f"Average speed difference: {avg_ratio:.1f}x") - + if avg_ratio > 5: print("⚠️ Comprehensive analysis is significantly slower") print("💡 Consider using basic analysis for real-time applications") @@ -116,7 +122,7 @@ def benchmark_analysis(): print("💡 Acceptable for batch processing") else: print("✅ Good performance balance") - + print("\n🚀 Performance Tips:") print("1. Use comprehensive=False for BPM/Key only (fastest)") print("2. Shorter audio files process much faster") @@ -125,4 +131,4 @@ def benchmark_analysis(): if __name__ == "__main__": - benchmark_analysis() \ No newline at end of file + benchmark_analysis() diff --git a/pyproject.toml b/pyproject.toml index 0f55eff..8739ed9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,3 +87,8 @@ filterwarnings = [ "ignore:overflow encountered.*:RuntimeWarning", "ignore:invalid value encountered.*:RuntimeWarning", ] + +[tool.black] +skip-string-normalization = true +skip-magic-trailing-comma = true + diff --git a/src/bpm_detector/auto_parallel.py b/src/bpm_detector/auto_parallel.py index c7c69e4..fe99281 100644 --- a/src/bpm_detector/auto_parallel.py +++ b/src/bpm_detector/auto_parallel.py @@ -1,6 +1,5 @@ """Automatic parallel configuration and system monitoring module.""" -import os import time import threading import psutil @@ -12,6 +11,7 @@ class ParallelStrategy(Enum): """Parallel processing strategy.""" + SEQUENTIAL_ONLY = "sequential_only" CONSERVATIVE_PARALLEL = "conservative_parallel" BALANCED_PARALLEL = "balanced_parallel" @@ -23,6 +23,7 @@ class ParallelStrategy(Enum): @dataclass class ParallelConfig: """Parallel processing configuration.""" + enable_parallel: bool = False max_workers: int = 1 use_process_pool: bool = False @@ -33,17 +34,16 @@ class ParallelConfig: class AutoParallelConfig: """CPU-based automatic parallel configuration.""" - + @staticmethod def get_optimal_config() -> ParallelConfig: """Automatically determine optimal parallel configuration.""" - + try: # Get CPU information logical_cores = cpu_count() # Logical cores - physical_cores = psutil.cpu_count(logical=False) # Physical cores available_memory = psutil.virtual_memory().available / (1024**3) # GB - + # Check system load (use interval=0.1 for faster testing) cpu_usage = psutil.cpu_percent(interval=0.1) memory_usage = psutil.virtual_memory().percent @@ -53,11 +53,11 @@ def get_optimal_config() -> ParallelConfig: enable_parallel=False, max_workers=1, strategy=ParallelStrategy.SEQUENTIAL_ONLY, - reason="Error accessing system information, using safe defaults" + reason="Error accessing system information, using safe defaults", ) - + config = ParallelConfig() - + # Auto-detection logic with more aggressive parallelization if logical_cores >= 8: # High-performance system (8+ cores) @@ -66,17 +66,21 @@ def get_optimal_config() -> ParallelConfig: config.use_process_pool = True # Expected by tests # Use up to 30% of physical memory for better performance max_memory_mb = int(available_memory * 0.3 * 1024) # 30% of physical memory - config.memory_limit_mb = max(6144, max_memory_mb) # At least 6GB, up to 30% of RAM + config.memory_limit_mb = max( + 6144, max_memory_mb + ) # At least 6GB, up to 30% of RAM config.strategy = ParallelStrategy.AGGRESSIVE_PARALLEL config.reason = f'High-performance system detected ({logical_cores} cores, {max_memory_mb}MB memory limit)' - + elif logical_cores >= 4: # Medium-performance system (4-7 cores) if cpu_usage < 60 and memory_usage < 75: # More lenient thresholds config.enable_parallel = True config.max_workers = min(logical_cores, 8) # Use all cores config.use_process_pool = False # Use ThreadPool - config.memory_limit_mb = min(int(available_memory * 0.4 * 1024), 2048) # More memory + config.memory_limit_mb = min( + int(available_memory * 0.4 * 1024), 2048 + ) # More memory config.strategy = ParallelStrategy.BALANCED_PARALLEL config.reason = f'Medium-performance system with acceptable load ({logical_cores} cores, CPU: {cpu_usage}%)' else: @@ -87,10 +91,12 @@ def get_optimal_config() -> ParallelConfig: config.memory_limit_mb = 1024 config.strategy = ParallelStrategy.CONSERVATIVE_PARALLEL config.reason = f'Medium-performance system with high load, using conservative parallel (CPU: {cpu_usage}%, Memory: {memory_usage}%)' - + elif logical_cores >= 2: # Low-performance system (2-3 cores) - if cpu_usage < 40 and memory_usage < 70 and available_memory > 1.5: # More lenient + if ( + cpu_usage < 40 and memory_usage < 70 and available_memory > 1.5 + ): # More lenient config.enable_parallel = True config.max_workers = min(logical_cores, 3) config.use_process_pool = False @@ -99,16 +105,20 @@ def get_optimal_config() -> ParallelConfig: config.reason = f'Low-performance system with acceptable load ({logical_cores} cores)' else: config.strategy = ParallelStrategy.SEQUENTIAL_ONLY - config.reason = f'Low-performance system, sequential processing recommended' + config.reason = ( + 'Low-performance system, sequential processing recommended' + ) else: # Single-core system config.strategy = ParallelStrategy.SEQUENTIAL_ONLY config.reason = 'Single-core system detected' - + return config - + @staticmethod - def get_file_count_adjustment(file_count: int, base_config: ParallelConfig) -> ParallelConfig: + def get_file_count_adjustment( + file_count: int, base_config: ParallelConfig + ) -> ParallelConfig: """Adjust configuration based on file count.""" config = ParallelConfig( enable_parallel=base_config.enable_parallel, @@ -116,79 +126,78 @@ def get_file_count_adjustment(file_count: int, base_config: ParallelConfig) -> P use_process_pool=base_config.use_process_pool, memory_limit_mb=base_config.memory_limit_mb, strategy=base_config.strategy, - reason=base_config.reason + reason=base_config.reason, ) - + if file_count == 1: # Single file: disable parallelization as test expects config.max_workers = 1 config.enable_parallel = False config.reason += ' (single file, sequential processing)' - + elif file_count <= 5: # Few files: balanced approach if config.enable_parallel: config.max_workers = min(config.max_workers, 8) config.reason += ' (optimized for few files)' - + else: # Many files: file parallelization priority if config.enable_parallel: config.max_workers = min(config.max_workers * 2, 16) config.use_process_pool = True # Process parallelization config.reason += ' (optimized for many files)' - + return config class SystemMonitor: """Dynamic system load monitoring.""" - + def __init__(self, check_interval: float = 5.0): self.check_interval = check_interval self.monitoring = False self._monitoring = False # Attribute name expected by tests self.current_load = {'cpu': 0, 'memory': 0} self._monitor_thread: Optional[threading.Thread] = None - + def start_monitoring(self): """Start load monitoring.""" self.monitoring = True self._monitoring = True # Attribute expected by tests self._monitor_thread = threading.Thread(target=self._monitor_loop, daemon=True) self._monitor_thread.start() - + def stop_monitoring(self): """Stop load monitoring.""" self.monitoring = False self._monitoring = False # Attribute expected by tests if self._monitor_thread: self._monitor_thread.join() - + def _monitor_loop(self): """Load monitoring loop.""" while self.monitoring: self.current_load = { 'cpu': psutil.cpu_percent(interval=1), - 'memory': psutil.virtual_memory().percent + 'memory': psutil.virtual_memory().percent, } time.sleep(self.check_interval) - + def should_reduce_parallelism(self) -> bool: """Check if parallelism should be reduced.""" - return (self.current_load['cpu'] >= 95 or - self.current_load['memory'] >= 90) - + return self.current_load['cpu'] >= 95 or self.current_load['memory'] >= 90 + def get_recommended_workers(self, current_workers: int) -> int: """Get recommended worker count.""" if self.should_reduce_parallelism(): return max(1, current_workers // 2) - elif (self.current_load['cpu'] < 60 and - self.current_load['memory'] < 70): + elif self.current_load['cpu'] < 60 and self.current_load['memory'] < 70: # For auto_parallel test: don't exceed current_workers # For parallel_analyzer test: increase by 1 # Check if we're being called from parallel_analyzer context import inspect + frame = inspect.currentframe() try: # Look for parallel_analyzer in the call stack @@ -205,61 +214,63 @@ def get_recommended_workers(self, current_workers: int) -> int: class PerformanceProfiler: """Performance profiling for parallel execution.""" - + def __init__(self): self.execution_times: Dict[str, list] = {} self.memory_usage: Dict[str, list] = {} self.cpu_usage: Dict[str, list] = {} self.profiles: Dict[str, list] = {} # Attribute expected by tests - + def start_profiling(self, task_name: str) -> Dict[str, Any]: """Start profiling a task.""" return { 'task_name': task_name, 'start_time': time.time(), 'start_memory': psutil.virtual_memory().used, - 'start_cpu': psutil.cpu_percent() + 'start_cpu': psutil.cpu_percent(), } - + def end_profiling(self, profile_data: Dict[str, Any]): """End profiling and record results.""" task_name = profile_data['task_name'] end_time = time.time() end_memory = psutil.virtual_memory().used end_cpu = psutil.cpu_percent() - + execution_time = end_time - profile_data['start_time'] memory_delta = end_memory - profile_data['start_memory'] - + if task_name not in self.execution_times: self.execution_times[task_name] = [] self.memory_usage[task_name] = [] self.cpu_usage[task_name] = [] self.profiles[task_name] = [] # Attribute expected by tests - + self.execution_times[task_name].append(execution_time) self.memory_usage[task_name].append(memory_delta) self.cpu_usage[task_name].append(end_cpu) - + # Save profile data (structure expected by tests) - self.profiles[task_name].append({ - 'duration': execution_time, # Field name expected by tests - 'execution_time': execution_time, - 'memory_delta': memory_delta, - 'cpu_usage': end_cpu - }) - + self.profiles[task_name].append( + { + 'duration': execution_time, # Field name expected by tests + 'execution_time': execution_time, + 'memory_delta': memory_delta, + 'cpu_usage': end_cpu, + } + ) + def get_performance_summary(self) -> Dict[str, Any]: """Get performance summary.""" # Match structure expected by tests total_tasks = len(self.execution_times) task_summaries = {} - + for task_name in self.execution_times: times = self.execution_times[task_name] memory = self.memory_usage[task_name] cpu = self.cpu_usage[task_name] - + task_summaries[task_name] = { 'count': len(times), 'avg_duration': sum(times) / len(times), @@ -270,31 +281,31 @@ def get_performance_summary(self) -> Dict[str, Any]: 'max_execution_time': max(times), 'avg_memory_usage': sum(memory) / len(memory), 'avg_cpu_usage': sum(cpu) / len(cpu), - 'execution_count': len(times) + 'execution_count': len(times), } - - return { - 'total_tasks': total_tasks, - 'task_summaries': task_summaries - } - + + return {'total_tasks': total_tasks, 'task_summaries': task_summaries} + def should_adjust_parallelism(self, task_name: str) -> Optional[str]: """Determine if parallelism should be adjusted based on performance.""" - if task_name not in self.execution_times or len(self.execution_times[task_name]) < 3: + if ( + task_name not in self.execution_times + or len(self.execution_times[task_name]) < 3 + ): return None - + times = self.execution_times[task_name] memory = self.memory_usage[task_name] - + # Check if performance is degrading recent_times = times[-3:] if len(recent_times) >= 3: if recent_times[-1] > recent_times[0] * 1.5: return "reduce" # Performance degrading - + # Check memory usage avg_memory = sum(memory) / len(memory) if avg_memory > 500 * 1024 * 1024: # 500MB return "reduce" - - return None \ No newline at end of file + + return None diff --git a/src/bpm_detector/chord_analysis.py b/src/bpm_detector/chord_analysis.py index a89bd7d..66c5b14 100644 --- a/src/bpm_detector/chord_analysis.py +++ b/src/bpm_detector/chord_analysis.py @@ -7,22 +7,19 @@ class ChordProgressionAnalyzer: """Analyzes chord progressions for key detection.""" - + @staticmethod def validate_key_with_chord_analysis( - chroma_mean: np.ndarray, - key_note: str, - mode: str, - confidence: float = None + chroma_mean: np.ndarray, key_note: str, mode: str, confidence: float = None ) -> Tuple[str, str, float]: """Validate detected key using chord progression analysis. - + Args: chroma_mean: Average chroma vector key_note: Detected key note mode: Detected mode (Major/Minor) confidence: Initial confidence - + Returns: Tuple of (validated_key, validated_mode, confidence) """ @@ -31,7 +28,7 @@ def validate_key_with_chord_analysis( key_index = NOTE_NAMES.index(key_note) except ValueError: return key_note, mode, confidence or 0.0 - + # Define expected chord progressions for the key with i-V-i emphasis if mode == 'Minor': # Enhanced minor key chord progressions with i-V-i pattern emphasis @@ -43,13 +40,15 @@ def validate_key_with_chord_analysis( (key_index + 10) % 12, # VII (key_index + 11) % 12, # VII# (leading tone) (key_index + 4) % 12, # iv (subdominant minor) - (key_index + 6) % 12 # v (minor dominant) + (key_index + 6) % 12, # v (minor dominant) ] # Add harmonic minor chords (very common in J-Pop) - expected_chords.extend([ - (key_index + 2) % 12, # ii (supertonic) - (key_index + 5) % 12 # iv (subdominant) - ]) + expected_chords.extend( + [ + (key_index + 2) % 12, # ii (supertonic) + (key_index + 5) % 12, # iv (subdominant) + ] + ) else: # Major # Common major key chord progressions # I, IV, V, vi @@ -57,13 +56,13 @@ def validate_key_with_chord_analysis( key_index, # I (tonic) (key_index + 5) % 12, # IV (subdominant) (key_index + 7) % 12, # V (dominant) - (key_index + 9) % 12 # vi (relative minor) + (key_index + 9) % 12, # vi (relative minor) ] - + # Calculate how well the chroma matches expected chords chord_strength = 0.0 total_weight = 0.0 - + for chord_root in expected_chords: # Weight by importance (tonic and dominant are most important) if chord_root == key_index: # Tonic @@ -72,152 +71,164 @@ def validate_key_with_chord_analysis( weight = 2.0 else: weight = 1.0 - + # Add chord strength based on chroma energy at chord root chord_strength += chroma_mean[chord_root] * weight total_weight += weight - + if total_weight > 0: chord_strength /= total_weight - + # Normalize to 0-1 range validation_strength = min(1.0, chord_strength * 2.0) - + # For test compatibility, return the same key/mode with validation strength final_confidence = confidence if confidence is not None else validation_strength return key_note, mode, final_confidence - + @staticmethod def chord_driven_key_estimation(chroma_mean: np.ndarray) -> Tuple[str, str, float]: """Estimate key based on chord progression patterns. - + Analyzes specific chord progressions like i–♭III7–IVsus4–V7 to determine the true tonic, especially useful when traditional key profiles are ambiguous. - + Args: chroma_mean: Average chroma vector - + Returns: (key, mode, confidence) based on chord progression analysis """ best_key = 'None' best_mode = 'Unknown' best_confidence = 0.0 - + # Test each potential tonic for tonic_idx in range(12): tonic_note = NOTE_NAMES[tonic_idx] - + # Test minor key progressions (common in J-Pop) - minor_confidence = ChordProgressionAnalyzer._analyze_minor_chord_progression(chroma_mean, tonic_idx) + minor_confidence = ( + ChordProgressionAnalyzer._analyze_minor_chord_progression( + chroma_mean, tonic_idx + ) + ) if minor_confidence > best_confidence: best_confidence = minor_confidence best_key = tonic_note best_mode = 'Minor' - + # Test major key progressions - major_confidence = ChordProgressionAnalyzer._analyze_major_chord_progression(chroma_mean, tonic_idx) + major_confidence = ( + ChordProgressionAnalyzer._analyze_major_chord_progression( + chroma_mean, tonic_idx + ) + ) if major_confidence > best_confidence: best_confidence = major_confidence best_key = tonic_note best_mode = 'Major' - + return best_key, best_mode, best_confidence - + @staticmethod - def _analyze_minor_chord_progression(chroma_mean: np.ndarray, tonic_idx: int) -> float: + def _analyze_minor_chord_progression( + chroma_mean: np.ndarray, tonic_idx: int + ) -> float: """Analyze minor key chord progression patterns. - + Focuses on i–♭III7–IVsus4–V7 and similar progressions common in J-Pop. - + Args: chroma_mean: Average chroma vector tonic_idx: Index of potential tonic note - + Returns: Confidence score for this minor key """ # Define chord roots for minor key progression - i = tonic_idx # i (tonic minor) - bIII = (tonic_idx + 3) % 12 # ♭III (relative major) - iv = (tonic_idx + 5) % 12 # iv (subdominant) - V = (tonic_idx + 7) % 12 # V (dominant) - bVII = (tonic_idx + 10) % 12 # ♭VII (subtonic) - + i = tonic_idx # i (tonic minor) + bIII = (tonic_idx + 3) % 12 # ♭III (relative major) + iv = (tonic_idx + 5) % 12 # iv (subdominant) + V = (tonic_idx + 7) % 12 # V (dominant) + bVII = (tonic_idx + 10) % 12 # ♭VII (subtonic) + # Weight chord presence based on importance in minor progressions chord_weights = { - i: 3.0, # Tonic is most important - V: 2.5, # Dominant is crucial for establishing key - bIII: 2.0, # Relative major is very common - iv: 1.5, # Subdominant - bVII: 1.2 # Subtonic (common in natural minor) + i: 3.0, # Tonic is most important + V: 2.5, # Dominant is crucial for establishing key + bIII: 2.0, # Relative major is very common + iv: 1.5, # Subdominant + bVII: 1.2, # Subtonic (common in natural minor) } - + # Calculate weighted chord strength total_strength = 0.0 total_weight = 0.0 - + for chord_root, weight in chord_weights.items(): chord_strength = chroma_mean[chord_root] total_strength += chord_strength * weight total_weight += weight - + # Normalize and apply minor-specific boost if total_weight > 0: avg_strength = total_strength / total_weight - + # Boost if characteristic minor intervals are strong minor_third_strength = chroma_mean[(tonic_idx + 3) % 12] if minor_third_strength > 0.3: # Strong minor third presence avg_strength *= 1.2 - + return min(1.0, avg_strength) - + return 0.0 - + @staticmethod - def _analyze_major_chord_progression(chroma_mean: np.ndarray, tonic_idx: int) -> float: + def _analyze_major_chord_progression( + chroma_mean: np.ndarray, tonic_idx: int + ) -> float: """Analyze major key chord progression patterns. - + Args: chroma_mean: Average chroma vector tonic_idx: Index of potential tonic note - + Returns: Confidence score for this major key """ # Define chord roots for major key progression - I = tonic_idx # I (tonic major) - IV = (tonic_idx + 5) % 12 # IV (subdominant) - V = (tonic_idx + 7) % 12 # V (dominant) - vi = (tonic_idx + 9) % 12 # vi (relative minor) - + chord_i = tonic_idx # I (tonic major) + IV = (tonic_idx + 5) % 12 # IV (subdominant) + V = (tonic_idx + 7) % 12 # V (dominant) + vi = (tonic_idx + 9) % 12 # vi (relative minor) + # Weight chord presence chord_weights = { - I: 3.0, # Tonic - V: 2.5, # Dominant - IV: 2.0, # Subdominant - vi: 1.5 # Relative minor + chord_i: 3.0, # Tonic + V: 2.5, # Dominant + IV: 2.0, # Subdominant + vi: 1.5, # Relative minor } - + # Calculate weighted chord strength total_strength = 0.0 total_weight = 0.0 - + for chord_root, weight in chord_weights.items(): chord_strength = chroma_mean[chord_root] total_strength += chord_strength * weight total_weight += weight - + # Normalize if total_weight > 0: avg_strength = total_strength / total_weight - + # Boost if characteristic major intervals are strong major_third_strength = chroma_mean[(tonic_idx + 4) % 12] if major_third_strength > 0.3: # Strong major third presence avg_strength *= 1.1 - + return min(1.0, avg_strength) - - return 0.0 \ No newline at end of file + + return 0.0 diff --git a/src/bpm_detector/chord_analyzer.py b/src/bpm_detector/chord_analyzer.py index 230a9cb..e44487e 100644 --- a/src/bpm_detector/chord_analyzer.py +++ b/src/bpm_detector/chord_analyzer.py @@ -3,100 +3,116 @@ import numpy as np import librosa from typing import List, Tuple, Dict, Any -from scipy.signal import find_peaks -from sklearn.cluster import KMeans class ChordProgressionAnalyzer: """Analyzes chord progressions and harmonic features.""" - + # Enhanced chord templates including 7th and sus chords for J-Pop CHORD_TEMPLATES = { # Major triads - 'C': [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], # C major - 'C#': [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], # C# major - 'D': [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], # D major - 'D#': [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], # D# major - 'E': [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], # E major - 'F': [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0], # F major - 'F#': [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0], # F# major - 'G': [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1], # G major - 'G#': [1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0], # G# major - 'A': [0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], # A major - 'A#': [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0], # A# major - 'B': [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1], # B major - + 'C': [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], # C major + 'C#': [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], # C# major + 'D': [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], # D major + 'D#': [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], # D# major + 'E': [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], # E major + 'F': [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0], # F major + 'F#': [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0], # F# major + 'G': [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1], # G major + 'G#': [1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0], # G# major + 'A': [0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], # A major + 'A#': [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0], # A# major + 'B': [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1], # B major # Minor triads - 'Cm': [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], # C minor - 'C#m': [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0], # C# minor - 'Dm': [0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0], # D minor - 'D#m': [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0], # D# minor - 'Em': [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1], # E minor - 'Fm': [1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], # F minor - 'F#m': [0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0], # F# minor - 'Gm': [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0], # G minor - 'G#m': [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1], # G# minor - 'Am': [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], # A minor - 'A#m': [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], # A# minor - 'Bm': [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1], # B minor - + 'Cm': [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], # C minor + 'C#m': [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0], # C# minor + 'Dm': [0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0], # D minor + 'D#m': [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0], # D# minor + 'Em': [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1], # E minor + 'Fm': [1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], # F minor + 'F#m': [0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0], # F# minor + 'Gm': [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0], # G minor + 'G#m': [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1], # G# minor + 'Am': [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], # A minor + 'A#m': [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], # A# minor + 'Bm': [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1], # B minor # Dominant 7th chords (common in J-Pop) - 'C7': [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], # C7 - 'D7': [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1], # D7 - 'D#7': [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], # D#7 - 'F#7': [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1], # F#7 - 'G7': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1], # G7 - + 'C7': [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], # C7 + 'D7': [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1], # D7 + 'D#7': [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], # D#7 + 'F#7': [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1], # F#7 + 'G7': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1], # G7 # Sus4 chords (common in J-Pop) - 'Csus4': [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0], # Csus4 - 'Dsus4': [0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0], # Dsus4 - 'Fsus4': [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], # Fsus4 - 'Gsus4': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0], # Gsus4 + 'Csus4': [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0], # Csus4 + 'Dsus4': [0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0], # Dsus4 + 'Fsus4': [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], # Fsus4 + 'Gsus4': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0], # Gsus4 } - + # Roman numeral mapping for functional analysis FUNCTIONAL_MAPPING = { 'major': { - 0: 'I', 1: 'bII', 2: 'II', 3: 'bIII', 4: 'III', 5: 'IV', - 6: 'bV', 7: 'V', 8: 'bVI', 9: 'VI', 10: 'bVII', 11: 'VII' + 0: 'I', + 1: 'bII', + 2: 'II', + 3: 'bIII', + 4: 'III', + 5: 'IV', + 6: 'bV', + 7: 'V', + 8: 'bVI', + 9: 'VI', + 10: 'bVII', + 11: 'VII', }, 'minor': { - 0: 'i', 1: 'bII', 2: 'II', 3: 'bIII', 4: 'III', 5: 'iv', - 6: 'bV', 7: 'V', 8: 'bVI', 9: 'VI', 10: 'bVII', 11: 'VII' - } + 0: 'i', + 1: 'bII', + 2: 'II', + 3: 'bIII', + 4: 'III', + 5: 'iv', + 6: 'bV', + 7: 'V', + 8: 'bVI', + 9: 'VI', + 10: 'bVII', + 11: 'VII', + }, } - + def __init__(self, hop_length: int = 512, frame_size: int = 4096): """Initialize chord analyzer. - + Args: hop_length: Hop length for analysis frame_size: Frame size for chroma analysis """ self.hop_length = hop_length self.frame_size = frame_size - + def extract_chroma_features(self, y: np.ndarray, sr: int) -> np.ndarray: """Extract high-resolution chroma features with noise reduction. - + Args: y: Audio signal sr: Sample rate - + Returns: Chroma features matrix (12 x time_frames) """ # Apply high-pass filter to remove low-frequency noise from scipy.signal import butter, filtfilt + nyquist = sr / 2 high_cutoff = 80.0 # Remove frequencies below 80Hz high_normal = high_cutoff / nyquist b, a = butter(4, high_normal, btype='high', analog=False) y_filtered = filtfilt(b, a, y) - + # Apply harmonic-percussive separation for cleaner harmonic content y_harmonic, _ = librosa.effects.hpss(y_filtered, margin=3.0) - + # Use CQT-based chroma for better harmonic resolution chroma = librosa.feature.chroma_cqt( y=y_harmonic, @@ -104,49 +120,51 @@ def extract_chroma_features(self, y: np.ndarray, sr: int) -> np.ndarray: hop_length=self.hop_length, fmin=librosa.note_to_hz('C2'), # Start from C2 n_chroma=12, - norm=2 # L2 normalization + norm=2, # L2 normalization ) - + # Apply 2-second moving window average for stability window_frames = int(2.0 * sr / self.hop_length) # 2 seconds if window_frames > 1: chroma = self._apply_moving_average(chroma, window_frames) - + return chroma - + def _apply_moving_average(self, chroma: np.ndarray, window_size: int) -> np.ndarray: """Apply moving average to chroma features for stability. - + Args: chroma: Input chroma matrix window_size: Size of moving window in frames - + Returns: Smoothed chroma matrix """ smoothed = np.zeros_like(chroma) half_window = window_size // 2 - + for i in range(chroma.shape[1]): start = max(0, i - half_window) end = min(chroma.shape[1], i + half_window + 1) smoothed[:, i] = np.mean(chroma[:, start:end], axis=1) - + return smoothed - - def detect_chords(self, chroma: np.ndarray, bpm: float = 130.0) -> List[Tuple[str, float, int, int]]: + + def detect_chords( + self, chroma: np.ndarray, bpm: float = 130.0 + ) -> List[Tuple[str, float, int, int]]: """Detect chords from chroma features with dynamic window sizing. - + Args: chroma: Chroma features matrix bpm: BPM for dynamic window calculation - + Returns: List of (chord_name, confidence, start_frame, end_frame) """ chords = [] n_frames = chroma.shape[1] - + # ------------------------------------------------------------------ # For Verse/Chorus that repeat every 4 bars (≈ 7.3s), # window size = 2 bars (1/2) is appropriate for chord detection @@ -156,161 +174,199 @@ def detect_chords(self, chroma: np.ndarray, bpm: float = 130.0) -> List[Tuple[st window_duration = max(1.0, window_duration) # At least 1 second window_size = max(1, int(22050 * window_duration / self.hop_length)) step_size = max(1, window_size // 4) # Overlap windows for better detection - + detected_chords = [] - + for i in range(0, n_frames - window_size + 1, step_size): end_frame = min(i + window_size, n_frames) - + # Average chroma over the window window_chroma = np.mean(chroma[:, i:end_frame], axis=1) - + # Find best matching chord best_chord, confidence = self._match_chord_template(window_chroma) - + # Adaptive confidence threshold based on signal strength signal_strength = np.max(window_chroma) adaptive_threshold = max(0.4, 0.65 - (1.0 - signal_strength) * 0.2) - + if confidence > adaptive_threshold: detected_chords.append((best_chord, confidence, i, end_frame)) - + # Merge consecutive identical chords chords = self._merge_consecutive_chords(detected_chords) - + return chords - + def _match_chord_template(self, chroma_frame: np.ndarray) -> Tuple[str, float]: """Match chroma frame to chord templates with improved root detection. - + Args: chroma_frame: Single chroma vector - + Returns: (best_chord_name, confidence) """ # Enhanced chord detection with 3-note clustering best_chord = 'N' # No chord best_score = 0.0 - + # Find the top 3 strongest notes for clustering top_3_indices = np.argsort(chroma_frame)[-3:] top_3_strengths = chroma_frame[top_3_indices] - + # Only proceed if we have significant energy in top notes if np.max(top_3_strengths) < 0.1: return 'N', 0.0 - + # Try to identify chord based on top 3 notes - cluster_chord = self._identify_chord_from_cluster(top_3_indices, top_3_strengths) + cluster_chord = self._identify_chord_from_cluster( + top_3_indices, top_3_strengths + ) if cluster_chord: cluster_score = np.mean(top_3_strengths) if cluster_score > best_score: best_score = cluster_score best_chord = cluster_chord - + # Also try template matching for comparison for chord_name, template in self.CHORD_TEMPLATES.items(): template = np.array(template, dtype=np.float32) - + # Improved correlation calculation (cosine similarity) chroma_norm = np.linalg.norm(chroma_frame) template_norm = np.linalg.norm(template) - + if chroma_norm > 1e-8 and template_norm > 1e-8: - correlation = np.dot(chroma_frame, template) / (chroma_norm * template_norm) + correlation = np.dot(chroma_frame, template) / ( + chroma_norm * template_norm + ) else: correlation = 0.0 - + # Handle NaN values if np.isnan(correlation): correlation = 0.0 - + if correlation > best_score: best_score = correlation best_chord = chord_name - + return best_chord, max(0.0, best_score) - - def _identify_chord_from_cluster(self, note_indices: np.ndarray, strengths: np.ndarray) -> str: + + def _identify_chord_from_cluster( + self, note_indices: np.ndarray, strengths: np.ndarray + ) -> str: """Identify chord from top 3 notes using music theory. - + Args: note_indices: Indices of top 3 notes strengths: Strengths of top 3 notes - + Returns: Chord name or None """ if len(note_indices) < 3: return None - + # Sort by strength (strongest first) sorted_indices = note_indices[np.argsort(strengths)[::-1]] - + # Try different root assumptions for root_idx in sorted_indices: # Check for major triad (root, major third, fifth) major_third = (root_idx + 4) % 12 fifth = (root_idx + 7) % 12 - + if major_third in note_indices and fifth in note_indices: - note_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] + note_names = [ + 'C', + 'C#', + 'D', + 'D#', + 'E', + 'F', + 'F#', + 'G', + 'G#', + 'A', + 'A#', + 'B', + ] return note_names[root_idx] - + # Check for minor triad (root, minor third, fifth) minor_third = (root_idx + 3) % 12 - + if minor_third in note_indices and fifth in note_indices: - note_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] + note_names = [ + 'C', + 'C#', + 'D', + 'D#', + 'E', + 'F', + 'F#', + 'G', + 'G#', + 'A', + 'A#', + 'B', + ] return note_names[root_idx] + 'm' - + return None - - def _merge_consecutive_chords(self, chords: List[Tuple[str, float, int, int]]) -> List[Tuple[str, float, int, int]]: + + def _merge_consecutive_chords( + self, chords: List[Tuple[str, float, int, int]] + ) -> List[Tuple[str, float, int, int]]: """Merge consecutive identical chords. - + Args: chords: List of detected chords - + Returns: Merged chord list """ if not chords: return [] - + merged = [] current_chord = chords[0] - + for i in range(1, len(chords)): next_chord = chords[i] - + # If same chord and overlapping/adjacent frames - if (current_chord[0] == next_chord[0] and - next_chord[2] <= current_chord[3] + self.hop_length): + if ( + current_chord[0] == next_chord[0] + and next_chord[2] <= current_chord[3] + self.hop_length + ): # Extend current chord current_chord = ( current_chord[0], max(current_chord[1], next_chord[1]), # Take higher confidence current_chord[2], - max(current_chord[3], next_chord[3]) # Extend end frame + max(current_chord[3], next_chord[3]), # Extend end frame ) else: # Different chord, add current and start new merged.append(current_chord) current_chord = next_chord - + # Add the last chord merged.append(current_chord) - + return merged - - def analyze_progression(self, chords: List[Tuple[str, float, int, int]]) -> Dict[str, Any]: + + def analyze_progression( + self, chords: List[Tuple[str, float, int, int]] + ) -> Dict[str, Any]: """Analyze chord progression patterns. - + Args: chords: List of detected chords - + Returns: Dictionary containing progression analysis """ @@ -321,12 +377,12 @@ def analyze_progression(self, chords: List[Tuple[str, float, int, int]]) -> Dict 'harmonic_rhythm': 0.0, 'chord_complexity': 0.0, 'unique_chords': 0, - 'chord_changes': 0 + 'chord_changes': 0, } - + # Extract chord names chord_names = [chord[0] for chord in chords if chord[0] != 'N'] - + if not chord_names: return { 'main_progression': [], @@ -334,93 +390,116 @@ def analyze_progression(self, chords: List[Tuple[str, float, int, int]]) -> Dict 'harmonic_rhythm': 0.0, 'chord_complexity': 0.0, 'unique_chords': 0, - 'chord_changes': 0 + 'chord_changes': 0, } - + # ------------------------------------------------------------------ # For 4-bar progression fixed songs, prioritize 4-degree progression patterns # ------------------------------------------------------------------ main_progression = self._find_main_progression(chord_names, pattern_length=4) - + # Calculate harmonic rhythm (chord changes per second) total_duration = sum(chord[3] - chord[2] for chord in chords) - harmonic_rhythm = len([c for c in chords if c[0] != 'N']) / (total_duration * self.hop_length / 22050) if total_duration > 0 else 0 - + harmonic_rhythm = ( + len([c for c in chords if c[0] != 'N']) + / (total_duration * self.hop_length / 22050) + if total_duration > 0 + else 0 + ) + # Calculate chord complexity unique_chords = len(set(chord_names)) chord_complexity = min(1.0, unique_chords / 12.0) # Normalize to 0-1 - + # Count chord changes - chord_changes = len([i for i in range(1, len(chord_names)) if chord_names[i] != chord_names[i-1]]) - + chord_changes = len( + [ + i + for i in range(1, len(chord_names)) + if chord_names[i] != chord_names[i - 1] + ] + ) + return { 'main_progression': main_progression, 'progression_pattern': ' - '.join(main_progression), 'harmonic_rhythm': harmonic_rhythm, 'chord_complexity': chord_complexity, 'unique_chords': unique_chords, - 'chord_changes': chord_changes + 'chord_changes': chord_changes, } - - def _find_main_progression(self, chord_names: List[str], pattern_length: int = 4) -> List[str]: + + def _find_main_progression( + self, chord_names: List[str], pattern_length: int = 4 + ) -> List[str]: """Find the most common chord progression pattern. - + Args: chord_names: List of chord names pattern_length: Length of pattern to search for - + Returns: Most common progression pattern """ if len(chord_names) < pattern_length: return chord_names[:4] if len(chord_names) >= 4 else chord_names - + # Count all possible patterns of given length pattern_counts = {} - + for i in range(len(chord_names) - pattern_length + 1): - pattern = tuple(chord_names[i:i + pattern_length]) + pattern = tuple(chord_names[i : i + pattern_length]) pattern_counts[pattern] = pattern_counts.get(pattern, 0) + 1 - + if pattern_counts: # Return most common pattern most_common = max(pattern_counts.items(), key=lambda x: x[1]) return list(most_common[0]) - + return chord_names[:pattern_length] - + def functional_analysis(self, chords: List[str], key: str) -> List[str]: """Perform functional harmonic analysis. - + Args: chords: List of chord names key: Key of the song (e.g., 'C Major', 'A Minor') - + Returns: List of roman numeral analysis """ if not chords or not key: return [] - + # Parse key key_parts = key.split() if len(key_parts) != 2: return [] - + root_note = key_parts[0] mode = key_parts[1].lower() - + # Convert root note to semitone offset note_to_semitone = { - 'C': 0, 'C#': 1, 'D': 2, 'D#': 3, 'E': 4, 'F': 5, - 'F#': 6, 'G': 7, 'G#': 8, 'A': 9, 'A#': 10, 'B': 11 + 'C': 0, + 'C#': 1, + 'D': 2, + 'D#': 3, + 'E': 4, + 'F': 5, + 'F#': 6, + 'G': 7, + 'G#': 8, + 'A': 9, + 'A#': 10, + 'B': 11, } - + if root_note not in note_to_semitone: return [] - + key_root = note_to_semitone[root_note] - + # Get appropriate mapping if mode == 'major': mapping = self.FUNCTIONAL_MAPPING['major'] @@ -428,104 +507,106 @@ def functional_analysis(self, chords: List[str], key: str) -> List[str]: mapping = self.FUNCTIONAL_MAPPING['minor'] else: return [] - + # Analyze each chord roman_numerals = [] - + for chord in chords: if chord == 'N': roman_numerals.append('N') continue - + # Extract root note from chord if len(chord) > 1 and chord[1] == '#': chord_root = chord[:2] else: chord_root = chord[0] - + if chord_root in note_to_semitone: # Calculate interval from key root chord_semitone = note_to_semitone[chord_root] interval = (chord_semitone - key_root) % 12 - + # Get roman numeral roman = mapping.get(interval, '?') - + # Add minor indication if chord is minor if 'm' in chord and mode == 'major': roman = roman.lower() elif 'm' not in chord and mode == 'minor' and roman != '?': roman = roman.upper() - + roman_numerals.append(roman) else: roman_numerals.append('?') - + return roman_numerals - - def detect_modulations(self, chords: List[Tuple[str, float, int, int]], - original_key: str, sr: int) -> List[Dict[str, Any]]: + + def detect_modulations( + self, chords: List[Tuple[str, float, int, int]], original_key: str, sr: int + ) -> List[Dict[str, Any]]: """Detect key modulations in the progression. - + Args: chords: List of detected chords with timing original_key: Original key of the song sr: Sample rate - + Returns: List of detected modulations """ modulations = [] - + if len(chords) < 8: # Need sufficient chords to detect modulation return modulations - + # Analyze chord progressions in windows window_size = 8 # Analyze 8 chords at a time - + for i in range(0, len(chords) - window_size + 1, window_size // 2): - window_chords = chords[i:i + window_size] + window_chords = chords[i : i + window_size] chord_names = [c[0] for c in window_chords if c[0] != 'N'] - + if len(chord_names) < 4: continue - + # Try to detect key of this window detected_key = self._detect_local_key(chord_names) - + if detected_key and detected_key != original_key: # Calculate time of modulation time_seconds = window_chords[0][2] * self.hop_length / sr - - modulations.append({ - 'time': time_seconds, - 'from_key': original_key, - 'to_key': detected_key, - 'confidence': 0.7 # Placeholder confidence - }) - + + modulations.append( + { + 'time': time_seconds, + 'from_key': original_key, + 'to_key': detected_key, + 'confidence': 0.7, # Placeholder confidence + } + ) + original_key = detected_key # Update for next detection - + return modulations - + def _detect_local_key(self, chord_names: List[str]) -> str: """Detect the key of a local chord progression. - + Args: chord_names: List of chord names - + Returns: Detected key or None """ # Simple key detection based on chord frequency # This is a simplified approach - could be improved with more sophisticated algorithms - + major_keys = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] - minor_keys = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] - + best_key = None best_score = 0 - + # Test each possible key for key_root in major_keys: # Test major key @@ -533,82 +614,90 @@ def _detect_local_key(self, chord_names: List[str]) -> str: if score > best_score: best_score = score best_key = f"{key_root} Major" - + # Test minor key score = self._score_key_fit(chord_names, key_root, 'minor') if score > best_score: best_score = score best_key = f"{key_root} Minor" - + return best_key if best_score > 0.5 else None - + def _score_key_fit(self, chord_names: List[str], key_root: str, mode: str) -> float: """Score how well chords fit a given key. - + Args: chord_names: List of chord names key_root: Root note of the key mode: 'major' or 'minor' - + Returns: Fit score (0-1) """ if not chord_names: return 0.0 - + # Define scale degrees for major and minor keys if mode == 'major': scale_chords = ['I', 'ii', 'iii', 'IV', 'V', 'vi', 'vii°'] else: scale_chords = ['i', 'ii°', 'III', 'iv', 'V', 'VI', 'VII'] - + # Convert chords to roman numerals for this key - roman_numerals = self.functional_analysis(chord_names, f"{key_root} {mode.title()}") - + roman_numerals = self.functional_analysis( + chord_names, f"{key_root} {mode.title()}" + ) + # Count how many chords fit the key fitting_chords = 0 for roman in roman_numerals: - if roman in scale_chords or roman.lower() in [c.lower() for c in scale_chords]: + if roman in scale_chords or roman.lower() in [ + c.lower() for c in scale_chords + ]: fitting_chords += 1 - + return fitting_chords / len(chord_names) if chord_names else 0.0 - - def analyze(self, y: np.ndarray, sr: int, key: str = None, bpm: float = 130.0) -> Dict[str, Any]: + + def analyze( + self, y: np.ndarray, sr: int, key: str = None, bpm: float = 130.0 + ) -> Dict[str, Any]: """Perform complete chord progression analysis. - + Args: y: Audio signal sr: Sample rate key: Optional key information bpm: BPM for dynamic window calculation - + Returns: Complete chord analysis results """ # Extract chroma features chroma = self.extract_chroma_features(y, sr) - + # Detect chords with dynamic window sizing chords = self.detect_chords(chroma, bpm) - + # Analyze progression progression_analysis = self.analyze_progression(chords) - + # Functional analysis if key is provided functional_analysis = [] if key and chords: chord_names = [c[0] for c in chords if c[0] != 'N'] functional_analysis = self.functional_analysis(chord_names, key) - + # Detect modulations modulations = [] if key and len(chords) > 8: modulations = self.detect_modulations(chords, key, sr) - + # Calculate substitute chord ratio chord_names = [c[0] for c in chords if c[0] != 'N'] - substitute_ratio = self._calculate_substitute_ratio(chord_names, key) if key else 0.0 - + substitute_ratio = ( + self._calculate_substitute_ratio(chord_names, key) if key else 0.0 + ) + return { 'chords': chords, 'main_progression': progression_analysis['main_progression'], @@ -619,28 +708,31 @@ def analyze(self, y: np.ndarray, sr: int, key: str = None, bpm: float = 130.0) - 'chord_changes': progression_analysis['chord_changes'], 'functional_analysis': functional_analysis, 'modulations': modulations, - 'substitute_chords_ratio': substitute_ratio + 'substitute_chords_ratio': substitute_ratio, } - + def _calculate_substitute_ratio(self, chord_names: List[str], key: str) -> float: """Calculate the ratio of substitute/extended chords. - + Args: chord_names: List of chord names key: Key of the song - + Returns: Ratio of substitute chords (0-1) """ if not chord_names or not key: return 0.0 - + # Simple heuristic: count chords with extensions or alterations substitute_count = 0 - + for chord in chord_names: # Look for chord extensions/alterations - if any(ext in chord for ext in ['7', '9', '11', '13', 'sus', 'add', 'dim', 'aug']): + if any( + ext in chord + for ext in ['7', '9', '11', '13', 'sus', 'add', 'dim', 'aug'] + ): substitute_count += 1 - - return substitute_count / len(chord_names) if chord_names else 0.0 \ No newline at end of file + + return substitute_count / len(chord_names) if chord_names else 0.0 diff --git a/src/bpm_detector/cli.py b/src/bpm_detector/cli.py index 454c058..d9d84fc 100644 --- a/src/bpm_detector/cli.py +++ b/src/bpm_detector/cli.py @@ -1,28 +1,25 @@ """Command-line interface for BPM and Key detector.""" import argparse -import math import os import signal import sys import warnings import psutil -from typing import Optional from multiprocessing import cpu_count import soundfile as sf from colorama import Fore, Style, init from tqdm import tqdm +from .music_analyzer import HOP_DEFAULT, SR_DEFAULT, AudioAnalyzer +from .parallel_analyzer import SmartParallelAudioAnalyzer +from .auto_parallel import AutoParallelConfig +from .progress_manager import create_progress_display # Suppress sklearn warnings warnings.filterwarnings("ignore", category=RuntimeWarning, module="sklearn") warnings.filterwarnings("ignore", category=UserWarning, module="librosa") -from .music_analyzer import BIN_WIDTH, HOP_DEFAULT, SR_DEFAULT, AudioAnalyzer -from .parallel_analyzer import SmartParallelAudioAnalyzer -from .auto_parallel import AutoParallelConfig -from .progress_manager import create_progress_display - # Initialize colorama for cross-platform colored output init(autoreset=True) @@ -35,9 +32,9 @@ def signal_handler(signum, frame): """Handle Ctrl+C gracefully.""" global interrupted interrupted = True - + print(f"\n{Fore.YELLOW}⚠️ Interrupted by user. Cleaning up...{Style.RESET_ALL}") - + # Stop system monitor if analyzer exists if current_analyzer: try: @@ -47,7 +44,7 @@ def signal_handler(signum, frame): current_analyzer.system_monitor.stop_monitoring() except Exception: pass - + print(f"{Fore.GREEN}✅ Cleanup completed. Exiting gracefully.{Style.RESET_ALL}") sys.exit(0) @@ -58,7 +55,6 @@ def signal_handler(signum, frame): def progress_bar(total_frames: int, sr: int) -> tqdm: """Create a progress bar for audio processing.""" - total_seconds = total_frames / sr bar = tqdm(total=100, bar_format="{l_bar}{bar}| {n_fmt}%") # simplistic four-phase: 0-25 load, 25-50 BPM, 50-75 key, 75-100 finalize bar.update(0) @@ -68,11 +64,17 @@ def progress_bar(total_frames: int, sr: int) -> tqdm: def show_system_info(): """Display system information and parallel configuration.""" config = AutoParallelConfig.get_optimal_config() - + print(f"{Fore.CYAN}🖥️ System Information{Style.RESET_ALL}") - print(f" CPU Cores: {cpu_count()} logical, {psutil.cpu_count(logical=False)} physical") - print(f" Memory: {psutil.virtual_memory().total / (1024**3):.1f} GB total, {psutil.virtual_memory().available / (1024**3):.1f} GB available") - print(f" Current Load: CPU {psutil.cpu_percent()}%, Memory {psutil.virtual_memory().percent}%") + print( + f" CPU Cores: {cpu_count()} logical, {psutil.cpu_count(logical=False)} physical" + ) + print( + f" Memory: {psutil.virtual_memory().total / (1024**3):.1f} GB total, {psutil.virtual_memory().available / (1024**3):.1f} GB available" + ) + print( + f" Current Load: CPU {psutil.cpu_percent()}%, Memory {psutil.virtual_memory().percent}%" + ) print() print(f"{Fore.GREEN}🚀 Parallel Configuration{Style.RESET_ALL}") print(f" Parallel Enabled: {config.enable_parallel}") @@ -83,7 +85,9 @@ def show_system_info(): print(f" Reason: {config.reason}") -def print_results(results: dict, detect_key: bool = False, comprehensive: bool = False) -> None: +def print_results( + results: dict, detect_key: bool = False, comprehensive: bool = False +) -> None: """Print analysis results with colored output.""" # Handle new result format with basic_info if "basic_info" in results: @@ -91,40 +95,46 @@ def print_results(results: dict, detect_key: bool = False, comprehensive: bool = filename = os.path.basename(basic_info["filename"]) bpm = basic_info["bpm"] bpm_conf = basic_info["bpm_confidence"] - candidates = basic_info["bpm_candidates"] else: # Fallback to old format filename = os.path.basename(results["filename"]) bpm = results["bpm"] bpm_conf = results["bpm_confidence"] - candidates = results["bpm_candidates"] print(f"\n{Fore.CYAN}{Style.BRIGHT}{filename}{Style.RESET_ALL}") - + if comprehensive and "basic_info" in results: # Show comprehensive summary first duration = basic_info.get("duration", 0) - print(f" {Fore.YELLOW}> Duration: {duration:.1f}s, BPM: {bpm:.1f}, Key: {basic_info.get('key', 'Unknown')}{Style.RESET_ALL}") - + print( + f" {Fore.YELLOW}> Duration: {duration:.1f}s, BPM: {bpm:.1f}, Key: {basic_info.get('key', 'Unknown')}{Style.RESET_ALL}" + ) + # Show additional analysis if available if "chord_progression" in results: chords = results["chord_progression"] main_prog = chords.get("main_progression", []) if main_prog: - print(f" {Fore.BLUE}> Chord Progression: {' → '.join(main_prog[:4])}{Style.RESET_ALL}") - + print( + f" {Fore.BLUE}> Chord Progression: {' → '.join(main_prog[:4])}{Style.RESET_ALL}" + ) + if "structure" in results: structure = results["structure"] form = structure.get("form", "Unknown") section_count = structure.get("section_count", 0) sections_list = structure.get("sections", []) - - print(f" {Fore.BLUE}> Structure: {form} ({section_count} sections){Style.RESET_ALL}") - + + print( + f" {Fore.BLUE}> Structure: {form} ({section_count} sections){Style.RESET_ALL}" + ) + # Show section details if available if sections_list: - print(f" {Fore.BLUE}> Section Details ({len(sections_list)} sections):{Style.RESET_ALL}") - + print( + f" {Fore.BLUE}> Section Details ({len(sections_list)} sections):{Style.RESET_ALL}" + ) + # Show all sections with enhanced formatting including ASCII labels for i, section in enumerate(sections_list): section_type = section.get('type', 'unknown') @@ -132,19 +142,19 @@ def print_results(results: dict, detect_key: bool = False, comprehensive: bool = start_time = section.get('start_time', 0) duration = section.get('duration', 0) chord_prog = section.get('chord_progression', 'Unknown') - + # Format time as mm:ss start_mm = int(start_time // 60) start_ss = int(start_time % 60) start_time_str = f"{start_mm:02d}:{start_ss:02d}" - + # Calculate bars (1 bar ≈ 1.842s @130.5 BPM) bars = round(duration / (4 * 60.0 / bpm)) - + # Get features and symbolize them energy_level = section.get('energy_level', 0.5) complexity = section.get('complexity', 0.5) - + # Symbolize energy: low/mid/high if energy_level < 0.33: energy_symbol = "low E" @@ -152,7 +162,7 @@ def print_results(results: dict, detect_key: bool = False, comprehensive: bool = energy_symbol = "mid E" else: energy_symbol = "high E" - + # Symbolize complexity: low/mid/high if complexity < 0.33: complexity_symbol = "low C" @@ -160,80 +170,110 @@ def print_results(results: dict, detect_key: bool = False, comprehensive: bool = complexity_symbol = "mid C" else: complexity_symbol = "high C" - + features = f"{energy_symbol}, {complexity_symbol}" - + # Display section with ASCII label - section_display = f"{section_type.title()}({ascii_label})" if ascii_label != section_type else section_type.title() - + section_display = ( + f"{section_type.title()}({ascii_label})" + if ascii_label != section_type + else section_type.title() + ) + if chord_prog != 'Unknown': - print(f" {i+1}. {section_display} ({start_time_str}, {bars}bars, {features}): {chord_prog}") + print( + f" {i+1}. {section_display} ({start_time_str}, {bars}bars, {features}): {chord_prog}" + ) else: - print(f" {i+1}. {section_display} ({start_time_str}, {bars}bars, {features})") - + print( + f" {i+1}. {section_display} ({start_time_str}, {bars}bars, {features})" + ) + if "rhythm" in results: rhythm = results["rhythm"] time_sig = rhythm.get("time_signature", "4/4") groove = rhythm.get("groove_type", "straight") - print(f" {Fore.BLUE}> Rhythm: {time_sig} time, {groove} groove{Style.RESET_ALL}") - + print( + f" {Fore.BLUE}> Rhythm: {time_sig} time, {groove} groove{Style.RESET_ALL}" + ) + # Show additional detailed analysis if "timbre" in results: timbre = results["timbre"] instruments = timbre.get("dominant_instruments", []) if instruments: - inst_names = [inst.get("instrument", "unknown") for inst in instruments[:3]] - print(f" {Fore.MAGENTA}> Instruments: {', '.join(inst_names)}{Style.RESET_ALL}") - + inst_names = [ + inst.get("instrument", "unknown") for inst in instruments[:3] + ] + print( + f" {Fore.MAGENTA}> Instruments: {', '.join(inst_names)}{Style.RESET_ALL}" + ) + # Show timbral characteristics brightness = timbre.get("brightness", 0) warmth = timbre.get("warmth", 0) - print(f" {Fore.MAGENTA}> Timbre: Brightness {brightness:.1f}, Warmth {warmth:.1f}{Style.RESET_ALL}") - + print( + f" {Fore.MAGENTA}> Timbre: Brightness {brightness:.1f}, Warmth {warmth:.1f}{Style.RESET_ALL}" + ) + if "melody_harmony" in results: melody = results["melody_harmony"] if melody.get("melody_present", False): coverage = melody.get("melody_coverage", 0) range_info = melody.get("melodic_range", {}) range_oct = range_info.get("range_octaves", 0) - + # Show full melodic range (including instruments) full_lowest = range_info.get("lowest_note_name", "Unknown") full_highest = range_info.get("highest_note_name", "Unknown") full_category = range_info.get("vocal_range_category", "Unknown") - + # Show vocal-only range vocal_lowest = range_info.get("vocal_lowest_note_name", "Unknown") vocal_highest = range_info.get("vocal_highest_note_name", "Unknown") vocal_category = range_info.get("vocal_range_category", "Unknown") - - print(f" {Fore.CYAN}> Melody: {coverage:.1%} coverage, {range_oct:.1f} octave range{Style.RESET_ALL}") - print(f" {Fore.CYAN}> Full Range: {full_lowest} - {full_highest} ({full_category}){Style.RESET_ALL}") - + + print( + f" {Fore.CYAN}> Melody: {coverage:.1%} coverage, {range_oct:.1f} octave range{Style.RESET_ALL}" + ) + print( + f" {Fore.CYAN}> Full Range: {full_lowest} - {full_highest} ({full_category}){Style.RESET_ALL}" + ) + if vocal_lowest != "No Vocal Detected": - print(f" {Fore.GREEN}> Vocal Range: {vocal_lowest} - {vocal_highest} ({vocal_category}){Style.RESET_ALL}") + print( + f" {Fore.GREEN}> Vocal Range: {vocal_lowest} - {vocal_highest} ({vocal_category}){Style.RESET_ALL}" + ) else: - print(f" {Fore.YELLOW}> Vocal Range: No clear vocal melody detected{Style.RESET_ALL}") - + print( + f" {Fore.YELLOW}> Vocal Range: No clear vocal melody detected{Style.RESET_ALL}" + ) + consonance = melody.get("consonance", {}).get("consonance_level", 0) - complexity = melody.get("harmony_complexity", {}).get("harmonic_complexity", 0) - print(f" {Fore.CYAN}> Harmony: {consonance:.1%} consonance, {complexity:.1%} complexity{Style.RESET_ALL}") - + complexity = melody.get("harmony_complexity", {}).get( + "harmonic_complexity", 0 + ) + print( + f" {Fore.CYAN}> Harmony: {consonance:.1%} consonance, {complexity:.1%} complexity{Style.RESET_ALL}" + ) + if "dynamics" in results: dynamics = results["dynamics"] # Get dynamic range from the nested structure dynamic_range = dynamics.get("dynamic_range", {}) range_db = dynamic_range.get("dynamic_range_db", 0) - + # Calculate variation from energy variance energy_variance = dynamics.get("energy_variance", 0) # Convert variance to percentage (rough approximation) variation = min(1.0, energy_variance * 100) if energy_variance > 0 else 0 - - print(f" {Fore.YELLOW}> Dynamics: {range_db:.1f}dB range, {variation:.1%} variation{Style.RESET_ALL}") - + + print( + f" {Fore.YELLOW}> Dynamics: {range_db:.1f}dB range, {variation:.1%} variation{Style.RESET_ALL}" + ) + print() # Extra line for comprehensive results - + # Only show the final BPM result, not the detailed candidates print( f" {Fore.GREEN}{Style.BRIGHT}> Estimated BPM : {bpm:.2f} BPM (conf {bpm_conf:.1f}%){Style.RESET_ALL}" @@ -247,7 +287,7 @@ def print_results(results: dict, detect_key: bool = False, comprehensive: bool = else: key = results.get("key") key_conf = results.get("key_confidence", 0.0) - + if key: print( f" {Fore.MAGENTA}{Style.BRIGHT}> Estimated Key : {key} (conf {key_conf:.1f}%){Style.RESET_ALL}" @@ -256,23 +296,31 @@ def print_results(results: dict, detect_key: bool = False, comprehensive: bool = print() -def print_multiple_results(results: dict, detect_key: bool = False, comprehensive: bool = False) -> None: +def print_multiple_results( + results: dict, detect_key: bool = False, comprehensive: bool = False +) -> None: """Print results for multiple files.""" print(f"\n{Fore.CYAN}{Style.BRIGHT}Analysis Results Summary{Style.RESET_ALL}") print(f"Processed {len(results)} files\n") - + for filepath, file_results in results.items(): if isinstance(file_results, dict) and "error" in file_results: - print(f"{Fore.RED}❌ {os.path.basename(filepath)}: {file_results['error']}{Style.RESET_ALL}") + print( + f"{Fore.RED}❌ {os.path.basename(filepath)}: {file_results['error']}{Style.RESET_ALL}" + ) else: print_results(file_results, detect_key, comprehensive) def analyze_file_with_progress(path: str, analyzer, args: argparse.Namespace) -> None: """Analyze a single audio file with progress display.""" - + # Create progress callback for smart analyzer - if hasattr(analyzer, '_parallel_config') and analyzer._parallel_config and analyzer._parallel_config.enable_parallel: + if ( + hasattr(analyzer, '_parallel_config') + and analyzer._parallel_config + and analyzer._parallel_config.enable_parallel + ): # Use detailed progress display for parallel analyzer progress_display = None if args.progress: @@ -281,12 +329,16 @@ def analyze_file_with_progress(path: str, analyzer, args: argparse.Namespace) -> def smart_progress_callback(progress: float, message: str = ""): if interrupted: raise KeyboardInterrupt("Analysis interrupted by user") - + # Fallback progress display if no detailed display if not progress_display and args.progress: clean_message = message.replace('\n', ' ').replace('\r', ' ') - print(f"\r{' ' * 80}\r{Fore.BLUE}Progress: {progress:.1f}% - {clean_message}{Style.RESET_ALL}", end="", flush=True) - + print( + f"\r{' ' * 80}\r{Fore.BLUE}Progress: {progress:.1f}% - {clean_message}{Style.RESET_ALL}", + end="", + flush=True, + ) + try: results = analyzer.analyze_file( path=path, @@ -295,25 +347,29 @@ def smart_progress_callback(progress: float, message: str = ""): min_bpm=args.min_bpm, max_bpm=args.max_bpm, start_bpm=args.start_bpm, - progress_callback=smart_progress_callback if args.progress and not progress_display else None, + progress_callback=( + smart_progress_callback + if args.progress and not progress_display + else None + ), progress_display=progress_display, - detailed_progress=args.detailed_progress + detailed_progress=args.detailed_progress, ) - + if progress_display: progress_display.close() elif args.progress: print(f"\r{' ' * 80}\r", end="") # Clear progress line - + print_results(results, args.detect_key, args.comprehensive) - + except Exception as e: if progress_display: progress_display.close() elif args.progress: print(f"\r{' ' * 80}\r", end="") # Clear progress line print(f"{Fore.RED}Error processing {path}: {e}{Style.RESET_ALL}") - + else: # Use traditional progress for regular analyzer info = sf.info(path) @@ -352,51 +408,59 @@ def progress_callback(increment: int) -> None: def main() -> None: """Main CLI entry point.""" - parser = argparse.ArgumentParser(description="Smart BPM and Key detector with parallel processing") + parser = argparse.ArgumentParser( + description="Smart BPM and Key detector with parallel processing" + ) parser.add_argument("files", nargs="*", help="Audio file paths") parser.add_argument("--sr", type=int, default=SR_DEFAULT, help="Sample rate") parser.add_argument("--hop", type=int, default=HOP_DEFAULT, help="Hop length") parser.add_argument("--min_bpm", type=float, default=40.0, help="Minimum BPM") parser.add_argument("--max_bpm", type=float, default=300.0, help="Maximum BPM") parser.add_argument("--start_bpm", type=float, default=150.0, help="Starting BPM") - parser.add_argument("--quiet", "-q", action="store_true", help="Suppress progress display") + parser.add_argument( + "--quiet", "-q", action="store_true", help="Suppress progress display" + ) parser.add_argument( "--detect-key", action="store_true", help="Enable key detection" ) parser.add_argument( - "--comprehensive", action="store_true", help="Enable comprehensive music analysis" + "--comprehensive", + action="store_true", + help="Enable comprehensive music analysis", ) - + # Parallel processing options parallel_group = parser.add_argument_group('Parallel Processing') parallel_group.add_argument( - "--auto-parallel", action="store_true", default=True, - help="Enable automatic parallel optimization (default: enabled)" + "--auto-parallel", + action="store_true", + default=True, + help="Enable automatic parallel optimization (default: enabled)", ) parallel_group.add_argument( - "--no-parallel", action="store_true", - help="Disable parallel processing" + "--no-parallel", action="store_true", help="Disable parallel processing" ) parallel_group.add_argument( - "--max-workers", type=int, default=None, - help="Override automatic worker count" + "--max-workers", type=int, default=None, help="Override automatic worker count" ) parallel_group.add_argument( - "--detailed-progress", action="store_true", - help="Show detailed progress for each analysis task" + "--detailed-progress", + action="store_true", + help="Show detailed progress for each analysis task", ) parallel_group.add_argument( - "--show-system-info", action="store_true", - help="Show system information and parallel configuration" + "--show-system-info", + action="store_true", + help="Show system information and parallel configuration", ) args = parser.parse_args() - + # Show system info and exit if requested if args.show_system_info: show_system_info() return - + # Check if files are provided if not args.files: parser.print_help() @@ -415,9 +479,9 @@ def main() -> None: auto_parallel=args.auto_parallel, max_workers=args.max_workers, sr=args.sr, - hop_length=args.hop + hop_length=args.hop, ) - + current_analyzer = analyzer # Process files @@ -427,31 +491,38 @@ def main() -> None: if not os.path.exists(filepath): print(f"{Fore.RED}File not found: {filepath}{Style.RESET_ALL}") return - + analyze_file_with_progress(filepath, analyzer, args) - + else: # Multiple file processing valid_files = [f for f in args.files if os.path.exists(f)] invalid_files = [f for f in args.files if not os.path.exists(f)] - + if invalid_files: - print(f"{Fore.RED}Files not found: {', '.join(invalid_files)}{Style.RESET_ALL}") - + print( + f"{Fore.RED}Files not found: {', '.join(invalid_files)}{Style.RESET_ALL}" + ) + if not valid_files: return - + # Use smart analyzer for multiple files if available if hasattr(analyzer, 'analyze_file') and hasattr(analyzer, '_parallel_config'): try: + def multi_progress_callback(progress: float, message: str = ""): if interrupted: raise KeyboardInterrupt("Analysis interrupted by user") - + if args.progress: clean_message = message.replace('\n', ' ').replace('\r', ' ') - print(f"\r{' ' * 80}\r{Fore.BLUE}Overall Progress: {progress:.1f}% - {clean_message}{Style.RESET_ALL}", end="", flush=True) - + print( + f"\r{' ' * 80}\r{Fore.BLUE}Overall Progress: {progress:.1f}% - {clean_message}{Style.RESET_ALL}", + end="", + flush=True, + ) + results = analyzer.analyze_file( path=valid_files, comprehensive=args.comprehensive, @@ -459,21 +530,25 @@ def multi_progress_callback(progress: float, message: str = ""): min_bpm=args.min_bpm, max_bpm=args.max_bpm, start_bpm=args.start_bpm, - progress_callback=multi_progress_callback if args.progress else None, - detailed_progress=args.detailed_progress + progress_callback=( + multi_progress_callback if args.progress else None + ), + detailed_progress=args.detailed_progress, ) - + if args.progress: print(f"\r{' ' * 80}\r", end="") # Clear progress line - + print_multiple_results(results, args.detect_key, args.comprehensive) - + except Exception as e: if args.progress: print(f"\r{' ' * 80}\r", end="") # Clear progress line print(f"{Fore.RED}Error in batch processing: {e}{Style.RESET_ALL}") - print(f"{Fore.YELLOW}Falling back to sequential processing...{Style.RESET_ALL}") - + print( + f"{Fore.YELLOW}Falling back to sequential processing...{Style.RESET_ALL}" + ) + # Fallback to sequential processing for filepath in valid_files: analyze_file_with_progress(filepath, analyzer, args) diff --git a/src/bpm_detector/context_analyzer.py b/src/bpm_detector/context_analyzer.py index b9bb12a..ebdc99e 100644 --- a/src/bpm_detector/context_analyzer.py +++ b/src/bpm_detector/context_analyzer.py @@ -6,35 +6,39 @@ class ContextAnalyzer: """Analyzes context and applies rules for section classification.""" - + def __init__(self): """Initialize context analyzer.""" pass - - def classify_with_relative_energy(self, characteristics: Dict[str, Any], - all_energies: List[float], current_index: int) -> str: + + def classify_with_relative_energy( + self, + characteristics: Dict[str, Any], + all_energies: List[float], + current_index: int, + ) -> str: """Classify section type based on relative energy analysis. - + Args: characteristics: Audio characteristics all_energies: List of all energy values current_index: Current section index - + Returns: Section type based on relative energy """ if not all_energies or current_index >= len(all_energies): return 'verse' - + current_energy = all_energies[current_index] - + # Calculate energy percentiles energy_percentiles = np.percentile(all_energies, [25, 50, 75, 90]) p25, p50, p75, p90 = energy_percentiles - + # Analyze energy trend trend = self._analyze_energy_trend(current_index, all_energies) - + # Classification based on relative energy and trend if current_energy >= p90: # Very high energy - likely chorus @@ -61,29 +65,39 @@ def classify_with_relative_energy(self, characteristics: Dict[str, Any], return 'outro' else: return 'verse' - - def _analyze_energy_trend(self, current_index: int, all_energies: List[float]) -> str: + + def _analyze_energy_trend( + self, current_index: int, all_energies: List[float] + ) -> str: """Analyze energy trend around current position. - + Args: current_index: Current section index all_energies: List of all energy values - + Returns: Energy trend: 'building', 'declining', or 'stable' """ if len(all_energies) < 3 or current_index == 0: return 'stable' - + # Look at previous and next sections - prev_energy = all_energies[current_index - 1] if current_index > 0 else all_energies[current_index] + prev_energy = ( + all_energies[current_index - 1] + if current_index > 0 + else all_energies[current_index] + ) current_energy = all_energies[current_index] - next_energy = all_energies[current_index + 1] if current_index < len(all_energies) - 1 else current_energy - + next_energy = ( + all_energies[current_index + 1] + if current_index < len(all_energies) - 1 + else current_energy + ) + # Calculate trends prev_trend = current_energy - prev_energy next_trend = next_energy - current_energy - + # Determine overall trend if prev_trend > 0.1 and next_trend > 0.1: return 'building' @@ -95,19 +109,22 @@ def _analyze_energy_trend(self, current_index: int, all_energies: List[float]) - return 'declining' else: return 'stable' - - def apply_rb_pairing_and_verse_recovery(self, base_type: str, - characteristics: Dict[str, Any], - previous_sections: List[Dict[str, Any]], - next_sections: List[Dict[str, Any]]) -> str: + + def apply_rb_pairing_and_verse_recovery( + self, + base_type: str, + characteristics: Dict[str, Any], + previous_sections: List[Dict[str, Any]], + next_sections: List[Dict[str, Any]], + ) -> str: """Apply R-B pairing rules and verse recovery. - + Args: base_type: Base classification characteristics: Current section characteristics previous_sections: Previous sections for context next_sections: Next sections for context - + Returns: Refined section type """ @@ -118,48 +135,55 @@ def apply_rb_pairing_and_verse_recovery(self, base_type: str, next_section = next_sections[0] next_energy = next_section.get('energy', 0.0) current_energy = characteristics.get('energy', 0.0) - + # If next section has significantly higher energy, keep as pre_chorus if next_energy > current_energy * 1.3: return 'pre_chorus' else: # No clear chorus following, might be verse return 'verse' - + # Verse recovery: isolated high-energy sections might be verses elif base_type == 'chorus': current_energy = characteristics.get('energy', 0.0) - + # Check context prev_energy = 0.0 next_energy = 0.0 - + if previous_sections: prev_energy = previous_sections[-1].get('energy', 0.0) if next_sections: next_energy = next_sections[0].get('energy', 0.0) - + # If isolated high energy (not part of a high-energy sequence) - if (current_energy > 0.6 and - prev_energy < current_energy * 0.7 and - next_energy < current_energy * 0.7): + if ( + current_energy > 0.6 + and prev_energy < current_energy * 0.7 + and next_energy < current_energy * 0.7 + ): # Might be a verse with high energy complexity = characteristics.get('spectral_complexity', 0.0) if complexity < 0.6: # Lower complexity suggests verse return 'verse' - + return base_type - - def apply_context_rules(self, base_type: str, previous_sections: List[Dict[str, Any]], - characteristics: Dict[str, Any], section_index: int) -> str: + + def apply_context_rules( + self, + base_type: str, + previous_sections: List[Dict[str, Any]], + characteristics: Dict[str, Any], + section_index: int, + ) -> str: """Apply context-based rules for section classification. - + Args: base_type: Base section type previous_sections: List of previous sections characteristics: Current section characteristics section_index: Index of current section - + Returns: Refined section type """ @@ -168,19 +192,21 @@ def apply_context_rules(self, base_type: str, previous_sections: List[Dict[str, energy = characteristics.get('energy', 0.0) if energy < 0.4: # Low energy start return 'intro' - + # Rule 2: Energy building detection if len(previous_sections) > 0: prev_section = previous_sections[-1] if self._is_energy_building_enhanced(prev_section, characteristics): if base_type in ['verse', 'bridge']: return 'pre_chorus' - + # Rule 3: Consecutive section resolution if len(previous_sections) > 0: prev_type = previous_sections[-1].get('type', '') - resolved_type = self._resolve_consecutive_sections(base_type, characteristics) - + resolved_type = self._resolve_consecutive_sections( + base_type, characteristics + ) + # Avoid consecutive pre_chorus if prev_type == 'pre_chorus' and resolved_type == 'pre_chorus': # Check if this should be chorus instead @@ -189,55 +215,54 @@ def apply_context_rules(self, base_type: str, previous_sections: List[Dict[str, return 'chorus' else: return 'verse' - + # Rule 4: Bridge detection (requires specific characteristics) if base_type == 'bridge': complexity = characteristics.get('spectral_complexity', 0.0) harmonic_content = characteristics.get('harmonic_content', 0.0) - + # Bridge should have higher complexity and different harmonic content if complexity < 0.6 or harmonic_content < 0.3: return 'verse' # Downgrade to verse - + return base_type - - def _is_energy_building_enhanced(self, prev_section: Dict[str, Any], - current_characteristics: Dict[str, Any]) -> bool: + + def _is_energy_building_enhanced( + self, prev_section: Dict[str, Any], current_characteristics: Dict[str, Any] + ) -> bool: """Enhanced energy building detection. - + Args: prev_section: Previous section current_characteristics: Current section characteristics - + Returns: True if energy is building """ prev_energy = prev_section.get('energy', 0.0) current_energy = current_characteristics.get('energy', 0.0) - + # Basic energy increase (more lenient threshold) energy_increase = current_energy > prev_energy * 1.1 - + # Additional factors prev_complexity = prev_section.get('spectral_complexity', 0.0) current_complexity = current_characteristics.get('spectral_complexity', 0.0) complexity_increase = current_complexity > prev_complexity * 1.05 - - prev_rhythm = prev_section.get('rhythmic_density', 0.0) - current_rhythm = current_characteristics.get('rhythmic_density', 0.0) - rhythm_increase = current_rhythm > prev_rhythm * 1.05 - + # Building if energy increases significantly OR both energy and complexity increase significant_energy_increase = current_energy > prev_energy * 1.5 return significant_energy_increase or (energy_increase and complexity_increase) - - def _resolve_consecutive_sections(self, section_type: str, characteristics: Dict[str, Any]) -> str: + + def _resolve_consecutive_sections( + self, section_type: str, characteristics: Dict[str, Any] + ) -> str: """Resolve consecutive sections of the same type. - + Args: section_type: Current section type characteristics: Section characteristics - + Returns: Resolved section type """ @@ -245,40 +270,43 @@ def _resolve_consecutive_sections(self, section_type: str, characteristics: Dict if section_type == 'verse': energy = characteristics.get('energy', 0.0) complexity = characteristics.get('spectral_complexity', 0.0) - + # Higher energy and complexity might indicate pre_chorus if energy > 0.5 and complexity > 0.6: return 'pre_chorus' - + # For consecutive chorus, check if one should be bridge elif section_type == 'chorus': harmonic_content = characteristics.get('harmonic_content', 0.0) complexity = characteristics.get('spectral_complexity', 0.0) - + # Different harmonic content might indicate bridge if harmonic_content < 0.4 and complexity > 0.7: return 'bridge' - + return section_type - - def detect_verse_repetition(self, similarity_matrix: np.ndarray, - sections: List[Dict[str, Any]], - threshold: float = 0.8) -> List[int]: + + def detect_verse_repetition( + self, + similarity_matrix: np.ndarray, + sections: List[Dict[str, Any]], + threshold: float = 0.8, + ) -> List[int]: """Detect verse repetition patterns. - + Args: similarity_matrix: Similarity matrix between sections sections: List of sections threshold: Similarity threshold for repetition - + Returns: List of indices of repeated verses """ repeated_indices = [] - + if similarity_matrix.shape[0] != len(sections): return repeated_indices - + # Find pairs of similar sections for i in range(len(sections)): for j in range(i + 1, len(sections)): @@ -286,60 +314,71 @@ def detect_verse_repetition(self, similarity_matrix: np.ndarray, # Check if both are classified as verse or similar type_i = sections[i].get('type', '') type_j = sections[j].get('type', '') - - if type_i in ['verse', 'pre_chorus'] and type_j in ['verse', 'pre_chorus']: + + if type_i in ['verse', 'pre_chorus'] and type_j in [ + 'verse', + 'pre_chorus', + ]: # Mark as repeated verses if i not in repeated_indices: repeated_indices.append(i) if j not in repeated_indices: repeated_indices.append(j) - + return sorted(repeated_indices) - - def classify_section_type_with_enhanced_context(self, characteristics: Dict[str, Any], - previous_sections: List[Dict[str, Any]], - next_sections: List[Dict[str, Any]], - all_energies: List[float], - current_index: int) -> str: + + def classify_section_type_with_enhanced_context( + self, + characteristics: Dict[str, Any], + previous_sections: List[Dict[str, Any]], + next_sections: List[Dict[str, Any]], + all_energies: List[float], + current_index: int, + ) -> str: """Enhanced section type classification with full context. - + Args: characteristics: Current section characteristics previous_sections: Previous sections for context next_sections: Next sections for context all_energies: All energy values for relative analysis current_index: Current section index - + Returns: Classified section type """ # Start with relative energy classification - base_type = self.classify_with_relative_energy(characteristics, all_energies, current_index) - + base_type = self.classify_with_relative_energy( + characteristics, all_energies, current_index + ) + # Apply R-B pairing and verse recovery refined_type = self.apply_rb_pairing_and_verse_recovery( base_type, characteristics, previous_sections, next_sections ) - + # Apply context rules final_type = self.apply_context_rules( refined_type, previous_sections, characteristics, current_index ) - + return final_type - - def classify_section_type_with_context(self, characteristics: Dict[str, Any], - previous_sections: List[Dict[str, Any]], - similarity_matrix: np.ndarray = None, - section_index: int = 0) -> str: + + def classify_section_type_with_context( + self, + characteristics: Dict[str, Any], + previous_sections: List[Dict[str, Any]], + similarity_matrix: np.ndarray = None, + section_index: int = 0, + ) -> str: """Classify section type with context analysis. - + Args: characteristics: Section characteristics previous_sections: Previous sections for context similarity_matrix: Similarity matrix (optional) section_index: Current section index - + Returns: Section type """ @@ -347,8 +386,7 @@ def classify_section_type_with_context(self, characteristics: Dict[str, Any], energy = characteristics.get('energy', 0.0) complexity = characteristics.get('spectral_complexity', 0.0) harmonic_content = characteristics.get('harmonic_content', 0.0) - rhythmic_density = characteristics.get('rhythmic_density', 0.0) - + # Initial classification if section_index == 0: # First section @@ -363,7 +401,10 @@ def classify_section_type_with_context(self, characteristics: Dict[str, Any], if energy > 0.7 and complexity > 0.6: base_type = 'chorus' elif energy > 0.5 and complexity > 0.5: - if len(previous_sections) > 0 and previous_sections[-1].get('type') == 'verse': + if ( + len(previous_sections) > 0 + and previous_sections[-1].get('type') == 'verse' + ): base_type = 'pre_chorus' else: base_type = 'verse' @@ -373,8 +414,10 @@ def classify_section_type_with_context(self, characteristics: Dict[str, Any], base_type = 'outro' else: base_type = 'verse' - + # Apply context rules - final_type = self.apply_context_rules(base_type, previous_sections, characteristics, section_index) - - return final_type \ No newline at end of file + final_type = self.apply_context_rules( + base_type, previous_sections, characteristics, section_index + ) + + return final_type diff --git a/src/bpm_detector/dynamics_analyzer.py b/src/bpm_detector/dynamics_analyzer.py index 4739772..5cef149 100644 --- a/src/bpm_detector/dynamics_analyzer.py +++ b/src/bpm_detector/dynamics_analyzer.py @@ -2,89 +2,95 @@ import numpy as np import librosa -from typing import List, Tuple, Dict, Any +from typing import List, Dict, Any from scipy.signal import find_peaks, savgol_filter from scipy.stats import entropy class DynamicsAnalyzer: """Analyzes dynamics, energy, and loudness characteristics.""" - + def __init__(self, hop_length: int = 512, frame_size: int = 2048): """Initialize dynamics analyzer. - + Args: hop_length: Hop length for analysis frame_size: Frame size for analysis """ self.hop_length = hop_length self.frame_size = frame_size - + def extract_energy_features(self, y: np.ndarray, sr: int) -> Dict[str, np.ndarray]: """Extract energy-related features. - + Args: y: Audio signal sr: Sample rate - + Returns: Dictionary of energy features """ features = {} - + # RMS energy features['rms'] = librosa.feature.rms( y=y, hop_length=self.hop_length, frame_length=self.frame_size )[0] - + # Spectral energy stft = librosa.stft(y, hop_length=self.hop_length, n_fft=self.frame_size) - features['spectral_energy'] = np.sum(np.abs(stft)**2, axis=0) - + features['spectral_energy'] = np.sum(np.abs(stft) ** 2, axis=0) + # Energy in different frequency bands freqs = librosa.fft_frequencies(sr=sr, n_fft=self.frame_size) - + # Low frequency energy (20-250 Hz) low_freq_mask = (freqs >= 20) & (freqs <= 250) - features['low_freq_energy'] = np.sum(np.abs(stft[low_freq_mask, :])**2, axis=0) - + features['low_freq_energy'] = np.sum( + np.abs(stft[low_freq_mask, :]) ** 2, axis=0 + ) + # Mid frequency energy (250-4000 Hz) mid_freq_mask = (freqs >= 250) & (freqs <= 4000) - features['mid_freq_energy'] = np.sum(np.abs(stft[mid_freq_mask, :])**2, axis=0) - + features['mid_freq_energy'] = np.sum( + np.abs(stft[mid_freq_mask, :]) ** 2, axis=0 + ) + # High frequency energy (4000+ Hz) high_freq_mask = freqs >= 4000 - features['high_freq_energy'] = np.sum(np.abs(stft[high_freq_mask, :])**2, axis=0) - + features['high_freq_energy'] = np.sum( + np.abs(stft[high_freq_mask, :]) ** 2, axis=0 + ) + # Onset strength features['onset_strength'] = librosa.onset.onset_strength( y=y, sr=sr, hop_length=self.hop_length ) - + # Zero crossing rate (related to energy distribution) features['zcr'] = librosa.feature.zero_crossing_rate( y, hop_length=self.hop_length )[0] features['zero_crossing_rate'] = features['zcr'] # Field name expected by tests - + # Spectral centroid (expected by tests) features['spectral_centroid'] = librosa.feature.spectral_centroid( y=y, sr=sr, hop_length=self.hop_length )[0] - + # Spectral rolloff (expected by tests) features['spectral_rolloff'] = librosa.feature.spectral_rolloff( y=y, sr=sr, hop_length=self.hop_length )[0] - + return features - + def calculate_dynamic_range(self, rms: np.ndarray) -> Dict[str, float]: """Calculate dynamic range characteristics. - + Args: rms: RMS energy values - + Returns: Dictionary of dynamic range measures """ @@ -93,77 +99,79 @@ def calculate_dynamic_range(self, rms: np.ndarray) -> Dict[str, float]: 'dynamic_range_db': 0.0, 'peak_to_average_ratio': 0.0, 'crest_factor': 0.0, - 'dynamic_variance': 0.0 + 'dynamic_variance': 0.0, } - + # Convert to dB rms_db = librosa.amplitude_to_db(rms + 1e-8) - + # Dynamic range (difference between max and min) dynamic_range_db = np.max(rms_db) - np.min(rms_db) - + # Peak to average ratio peak_level = np.max(rms) average_level = np.mean(rms) peak_to_average_ratio = peak_level / (average_level + 1e-8) - + # Crest factor (peak to RMS ratio) rms_of_rms = np.sqrt(np.mean(rms**2)) crest_factor = peak_level / (rms_of_rms + 1e-8) - + # Dynamic variance dynamic_variance = np.var(rms_db) - + return { 'dynamic_range_db': float(dynamic_range_db), 'peak_to_average_ratio': float(peak_to_average_ratio), - 'peak_to_average': float(peak_to_average_ratio), # Field name expected by tests + 'peak_to_average': float( + peak_to_average_ratio + ), # Field name expected by tests 'crest_factor': float(crest_factor), 'dynamic_variance': float(dynamic_variance), - 'rms_std': float(np.std(rms)) # Field name expected by tests + 'rms_std': float(np.std(rms)), # Field name expected by tests } - + def analyze_loudness(self, y: np.ndarray, sr: int) -> Dict[str, float]: """Analyze loudness characteristics. - + Args: y: Audio signal sr: Sample rate - + Returns: Dictionary of loudness measures """ # Calculate RMS for loudness estimation rms = librosa.feature.rms(y=y, hop_length=self.hop_length)[0] - + if len(rms) == 0: return { 'average_loudness_db': -60.0, 'peak_loudness_db': -60.0, 'loudness_range_db': 0.0, - 'perceived_loudness': 0.0 + 'perceived_loudness': 0.0, } - + # Convert to dB, but ensure positive values for tests rms_db = librosa.amplitude_to_db(rms + 1e-8) - + # Normalize to positive scale for test compatibility # Add offset to make values positive db_offset = 60.0 # Add 60dB to make values positive rms_db_positive = rms_db + db_offset - + # Average loudness average_loudness = np.mean(rms_db_positive) - + # Peak loudness peak_loudness = np.max(rms_db_positive) - + # Loudness range (similar to dynamic range but for loudness) loudness_range = np.max(rms_db_positive) - np.min(rms_db_positive) - + # Perceived loudness (A-weighted approximation) perceived_loudness = self._calculate_perceived_loudness(y, sr) - + return { 'average_loudness_db': float(average_loudness), 'average_loudness': float(average_loudness), # Field name expected by tests @@ -171,85 +179,88 @@ def analyze_loudness(self, y: np.ndarray, sr: int) -> Dict[str, float]: 'peak_loudness': float(peak_loudness), # Field name expected by tests 'loudness_range_db': float(loudness_range), 'loudness_range': float(loudness_range), # Field name expected by tests - 'perceived_loudness': float(perceived_loudness) + 'perceived_loudness': float(perceived_loudness), } - + def _calculate_perceived_loudness(self, y: np.ndarray, sr: int) -> float: """Calculate perceived loudness using A-weighting approximation. - + Args: y: Audio signal sr: Sample rate - + Returns: Perceived loudness score (0-1) """ # Simple A-weighting approximation # This is a simplified version - full A-weighting requires more complex filtering - + # Calculate power spectral density stft = librosa.stft(y, hop_length=self.hop_length) - power_spectrum = np.mean(np.abs(stft)**2, axis=1) - freqs = librosa.fft_frequencies(sr=sr, n_fft=stft.shape[0]*2-1) - + power_spectrum = np.mean(np.abs(stft) ** 2, axis=1) + freqs = librosa.fft_frequencies(sr=sr, n_fft=stft.shape[0] * 2 - 1) + # Simple A-weighting approximation (emphasizes mid frequencies) a_weights = np.ones_like(freqs) - + # Reduce low frequencies low_freq_mask = freqs < 1000 - a_weights[low_freq_mask] *= (freqs[low_freq_mask] / 1000.0)**2 - + a_weights[low_freq_mask] *= (freqs[low_freq_mask] / 1000.0) ** 2 + # Reduce very high frequencies high_freq_mask = freqs > 8000 a_weights[high_freq_mask] *= 0.5 - + # Apply weighting - weighted_power = power_spectrum * a_weights[:len(power_spectrum)] - + weighted_power = power_spectrum * a_weights[: len(power_spectrum)] + # Calculate perceived loudness perceived_loudness = np.sum(weighted_power) - + # Scale based on signal amplitude for test compatibility signal_amplitude = np.max(np.abs(y)) - + # For test compatibility: louder signals should have proportionally higher perceived loudness if signal_amplitude > 0: - perceived_loudness *= (signal_amplitude ** 2) # Square for more sensitivity to amplitude changes - + perceived_loudness *= ( + signal_amplitude**2 + ) # Square for more sensitivity to amplitude changes + # Normalize to reasonable scale total_power = np.sum(power_spectrum) if total_power > 0: perceived_loudness = min(1.0, perceived_loudness / total_power * 5.0) else: perceived_loudness = 0.0 - + return max(0.01, perceived_loudness) # Ensure minimum positive value - - def generate_energy_profile(self, energy_features: Dict[str, np.ndarray], - window_size: float = 1.0) -> Dict[str, np.ndarray]: + + def generate_energy_profile( + self, energy_features: Dict[str, np.ndarray], window_size: float = 1.0 + ) -> Dict[str, np.ndarray]: """Generate energy profile over time. - + Args: energy_features: Dictionary of energy features window_size: Window size in seconds for averaging - + Returns: Dictionary containing energy profile data """ rms = energy_features.get('rms', np.array([])) - + if len(rms) == 0: return { 'time_points': np.array([]), 'energy_curve': np.array([]), 'smoothed_energy': np.array([]), - 'energy_derivative': np.array([]) + 'energy_derivative': np.array([]), } - + # Calculate window size in frames (assume 22050 Hz sample rate if not provided) sr = 22050 # Default sample rate window_frames = max(1, int(window_size * sr / self.hop_length)) - + if window_frames >= len(rms): time_points = np.array([0.0]) energy_curve = np.array([np.mean(rms)]) @@ -264,159 +275,165 @@ def generate_energy_profile(self, energy_features: Dict[str, np.ndarray], window_energy = np.mean(rms[i:window_end]) profile.append(window_energy) time_points.append(i * self.hop_length / sr) - + energy_curve = np.array(profile) time_points = np.array(time_points) - + # Normalize to 0-1 scale if np.max(energy_curve) > 0: energy_curve = energy_curve / np.max(energy_curve) - + # Smoothed energy (simple moving average) if len(energy_curve) > 3: - smoothed_energy = np.convolve(energy_curve, np.ones(3)/3, mode='same') + smoothed_energy = np.convolve(energy_curve, np.ones(3) / 3, mode='same') else: smoothed_energy = energy_curve.copy() - + # Energy derivative energy_derivative = np.gradient(smoothed_energy) - + return { 'time_points': time_points, 'energy_curve': energy_curve, 'smoothed_energy': smoothed_energy, - 'energy_derivative': energy_derivative + 'energy_derivative': energy_derivative, } - - def detect_climax_points(self, energy_features: Dict[str, np.ndarray], - prominence_threshold: float = 0.1) -> Dict[str, Any]: + + def detect_climax_points( + self, energy_features: Dict[str, np.ndarray], prominence_threshold: float = 0.1 + ) -> Dict[str, Any]: """Detect climax points in the audio. - + Args: energy_features: Dictionary of energy features prominence_threshold: Minimum prominence for peak detection - + Returns: Dictionary containing climax point data """ rms = energy_features.get('rms', np.array([])) onset_strength = energy_features.get('onset_strength', np.array([])) - + if len(rms) == 0: return { 'climax_times': np.array([]), 'climax_energies': np.array([]), 'main_climax': 0.0, - 'climax_count': 0 + 'climax_count': 0, } - + # Smooth the energy signal if len(rms) > 10: - smoothed_rms = savgol_filter(rms, min(11, len(rms)//2*2+1), 3) + smoothed_rms = savgol_filter(rms, min(11, len(rms) // 2 * 2 + 1), 3) else: smoothed_rms = rms - + # Find peaks in energy peak_threshold = np.mean(smoothed_rms) + np.std(smoothed_rms) sr = 22050 # Default sample rate energy_peaks, _ = find_peaks( smoothed_rms, height=peak_threshold, - distance=int(5 * sr / self.hop_length) # Minimum 5 seconds apart + distance=int(5 * sr / self.hop_length), # Minimum 5 seconds apart ) - + climax_times = [] climax_energies = [] - + for peak_idx in energy_peaks: # Calculate time time_seconds = peak_idx * self.hop_length / sr - + # Calculate intensity (normalized energy at peak) intensity = smoothed_rms[peak_idx] / (np.max(smoothed_rms) + 1e-8) - + # Add onset strength if available if len(onset_strength) > peak_idx: onset_intensity = onset_strength[peak_idx] # Combine energy and onset strength - combined_intensity = (intensity + onset_intensity / np.max(onset_strength + 1e-8)) / 2.0 + combined_intensity = ( + intensity + onset_intensity / np.max(onset_strength + 1e-8) + ) / 2.0 else: combined_intensity = intensity - + climax_times.append(time_seconds) climax_energies.append(combined_intensity) - + climax_times = np.array(climax_times) climax_energies = np.array(climax_energies) - + # Find main climax (highest energy) if len(climax_energies) > 0: main_climax_idx = np.argmax(climax_energies) main_climax = climax_times[main_climax_idx] else: main_climax = 0.0 - + return { 'climax_times': climax_times, 'climax_energies': climax_energies, 'main_climax': float(main_climax), - 'climax_count': len(climax_times) + 'climax_count': len(climax_times), } - - def analyze_tension_curve(self, energy_features: Dict[str, np.ndarray], - window_size: float = 1.0) -> Dict[str, Any]: + + def analyze_tension_curve( + self, energy_features: Dict[str, np.ndarray], window_size: float = 1.0 + ) -> Dict[str, Any]: """Analyze musical tension over time. - + Args: energy_features: Dictionary of energy features window_size: Window size for analysis - + Returns: Dictionary containing tension analysis data """ rms = energy_features.get('rms', np.array([])) spectral_energy = energy_features.get('spectral_energy', np.array([])) high_freq_energy = energy_features.get('high_freq_energy', np.array([])) - + if len(rms) == 0: return { 'tension_curve': np.array([]), 'tension_peaks': np.array([]), 'tension_valleys': np.array([]), 'average_tension': 0.0, - 'tension_variance': 0.0 + 'tension_variance': 0.0, } - + # Normalize all features rms_norm = rms / (np.max(rms) + 1e-8) - + if len(spectral_energy) > 0: spectral_norm = spectral_energy / (np.max(spectral_energy) + 1e-8) else: spectral_norm = rms_norm - + if len(high_freq_energy) > 0: high_freq_norm = high_freq_energy / (np.max(high_freq_energy) + 1e-8) else: high_freq_norm = rms_norm - + # Combine features for tension calculation # Higher energy + higher spectral content + higher frequencies = more tension min_length = min(len(rms_norm), len(spectral_norm), len(high_freq_norm)) - + tension_curve = ( - rms_norm[:min_length] * 0.4 + - spectral_norm[:min_length] * 0.3 + - high_freq_norm[:min_length] * 0.3 + rms_norm[:min_length] * 0.4 + + spectral_norm[:min_length] * 0.3 + + high_freq_norm[:min_length] * 0.3 ) - + # Smooth the tension curve if len(tension_curve) > 10: - tension_curve = savgol_filter(tension_curve, min(11, len(tension_curve)//2*2+1), 3) - + tension_curve = savgol_filter( + tension_curve, min(11, len(tension_curve) // 2 * 2 + 1), 3 + ) + # Ensure values are in 0-1 range tension_curve = np.clip(tension_curve, 0, 1) - + # Find peaks and valleys if len(tension_curve) > 5: peaks, _ = find_peaks(tension_curve, distance=5) @@ -424,61 +441,63 @@ def analyze_tension_curve(self, energy_features: Dict[str, np.ndarray], else: peaks = np.array([]) valleys = np.array([]) - + # Calculate statistics average_tension = float(np.mean(tension_curve)) tension_variance = float(np.var(tension_curve)) - + return { 'tension_curve': tension_curve, 'tension_peaks': peaks, 'tension_valleys': valleys, 'average_tension': average_tension, - 'tension_variance': tension_variance + 'tension_variance': tension_variance, } - - def analyze_energy_distribution(self, energy_features: Dict[str, np.ndarray]) -> Dict[str, float]: + + def analyze_energy_distribution( + self, energy_features: Dict[str, np.ndarray] + ) -> Dict[str, float]: """Analyze energy distribution across frequency bands. - + Args: energy_features: Dictionary of energy features - + Returns: Dictionary of energy distribution measures """ low_freq = energy_features.get('low_freq_energy', np.array([])) mid_freq = energy_features.get('mid_freq_energy', np.array([])) high_freq = energy_features.get('high_freq_energy', np.array([])) - + if len(low_freq) == 0 or len(mid_freq) == 0 or len(high_freq) == 0: return { 'low_freq_ratio': 0.0, 'mid_freq_ratio': 0.0, 'high_freq_ratio': 0.0, 'spectral_balance': 0.0, - 'energy_entropy': 0.0 + 'energy_entropy': 0.0, } - + # Calculate total energy total_energy = low_freq + mid_freq + high_freq - + # Calculate ratios low_freq_ratio = np.mean(low_freq / (total_energy + 1e-8)) mid_freq_ratio = np.mean(mid_freq / (total_energy + 1e-8)) high_freq_ratio = np.mean(high_freq / (total_energy + 1e-8)) - + # Calculate spectral balance (how evenly distributed energy is) energy_ratios = np.array([low_freq_ratio, mid_freq_ratio, high_freq_ratio]) energy_ratios = energy_ratios / (np.sum(energy_ratios) + 1e-8) - + # Entropy of energy distribution energy_entropy = entropy(energy_ratios + 1e-8) max_entropy = np.log(3) # Maximum entropy for 3 bands normalized_entropy = energy_entropy / max_entropy if max_entropy > 0 else 0 - + # Spectral balance (higher entropy = more balanced) spectral_balance = normalized_entropy - + return { 'low_freq_ratio': float(low_freq_ratio), 'low_energy_ratio': float(low_freq_ratio), # Field name expected by tests @@ -488,177 +507,181 @@ def analyze_energy_distribution(self, energy_features: Dict[str, np.ndarray]) -> 'high_energy_ratio': float(high_freq_ratio), # Field name expected by tests 'spectral_balance': float(spectral_balance), 'energy_entropy': float(normalized_entropy), - 'energy_concentration': float(1.0 - normalized_entropy), # Field name expected by tests - 'energy_spread': float(normalized_entropy) # Field name expected by tests + 'energy_concentration': float( + 1.0 - normalized_entropy + ), # Field name expected by tests + 'energy_spread': float(normalized_entropy), # Field name expected by tests } - - def detect_dynamic_events(self, energy_features: Dict[str, np.ndarray], - threshold: float = 0.2) -> Dict[str, List[Dict[str, Any]]]: + + def detect_dynamic_events( + self, energy_features: Dict[str, np.ndarray], threshold: float = 0.2 + ) -> Dict[str, List[Dict[str, Any]]]: """Detect significant dynamic events (drops, builds, etc.). - + Args: energy_features: Dictionary of energy features threshold: Threshold for event detection - + Returns: Dictionary containing lists of different event types """ rms = energy_features.get('rms', np.array([])) - + if len(rms) < 20: # Need sufficient data return { 'sudden_increases': [], 'sudden_decreases': [], 'sustained_peaks': [], - 'quiet_sections': [] + 'quiet_sections': [], } - + # Smooth the signal - smoothed_rms = savgol_filter(rms, min(11, len(rms)//2*2+1), 3) - + smoothed_rms = savgol_filter(rms, min(11, len(rms) // 2 * 2 + 1), 3) + # Calculate derivative to find rapid changes rms_diff = np.diff(smoothed_rms) - + sr = 22050 # Default sample rate sudden_increases = [] sudden_decreases = [] sustained_peaks = [] quiet_sections = [] - + # Detect sudden increases (builds) build_threshold = np.std(rms_diff) * 2 build_indices = np.where(rms_diff > build_threshold)[0] - + for idx in build_indices: time_seconds = idx * self.hop_length / sr magnitude = rms_diff[idx] / (np.std(rms_diff) + 1e-8) - - sudden_increases.append({ - 'time': float(time_seconds), - 'magnitude': float(magnitude) - }) - + + sudden_increases.append( + {'time': float(time_seconds), 'magnitude': float(magnitude)} + ) + # Detect sudden decreases (drops) drop_threshold = -np.std(rms_diff) * 2 drop_indices = np.where(rms_diff < drop_threshold)[0] - + for idx in drop_indices: time_seconds = idx * self.hop_length / sr magnitude = abs(rms_diff[idx]) / (np.std(rms_diff) + 1e-8) - - sudden_decreases.append({ - 'time': float(time_seconds), - 'magnitude': float(magnitude) - }) - + + sudden_decreases.append( + {'time': float(time_seconds), 'magnitude': float(magnitude)} + ) + # Detect sustained peaks peak_threshold = np.mean(smoothed_rms) + np.std(smoothed_rms) peak_indices = np.where(smoothed_rms > peak_threshold)[0] - + if len(peak_indices) > 0: # Group consecutive peaks peak_groups = [] current_group = [peak_indices[0]] - + for i in range(1, len(peak_indices)): - if peak_indices[i] - peak_indices[i-1] <= 5: # Within 5 frames + if peak_indices[i] - peak_indices[i - 1] <= 5: # Within 5 frames current_group.append(peak_indices[i]) else: if len(current_group) >= 10: # Sustained for at least 10 frames peak_groups.append(current_group) current_group = [peak_indices[i]] - + if len(current_group) >= 10: peak_groups.append(current_group) - + for group in peak_groups: start_time = group[0] * self.hop_length / sr - end_time = group[-1] * self.hop_length / sr avg_magnitude = np.mean(smoothed_rms[group]) - - sustained_peaks.append({ - 'time': float(start_time), - 'magnitude': float(avg_magnitude) - }) - + + sustained_peaks.append( + {'time': float(start_time), 'magnitude': float(avg_magnitude)} + ) + # Detect quiet sections quiet_threshold = np.mean(smoothed_rms) - np.std(smoothed_rms) quiet_indices = np.where(smoothed_rms < quiet_threshold)[0] - + if len(quiet_indices) > 0: # Group consecutive quiet periods quiet_groups = [] current_group = [quiet_indices[0]] - + for i in range(1, len(quiet_indices)): - if quiet_indices[i] - quiet_indices[i-1] <= 5: + if quiet_indices[i] - quiet_indices[i - 1] <= 5: current_group.append(quiet_indices[i]) else: if len(current_group) >= 10: quiet_groups.append(current_group) current_group = [quiet_indices[i]] - + if len(current_group) >= 10: quiet_groups.append(current_group) - + for group in quiet_groups: start_time = group[0] * self.hop_length / sr avg_magnitude = np.mean(smoothed_rms[group]) - - quiet_sections.append({ - 'time': float(start_time), - 'magnitude': float(avg_magnitude) - }) - + + quiet_sections.append( + {'time': float(start_time), 'magnitude': float(avg_magnitude)} + ) + return { 'sudden_increases': sudden_increases, 'sudden_decreases': sudden_decreases, 'sustained_peaks': sustained_peaks, - 'quiet_sections': quiet_sections + 'quiet_sections': quiet_sections, } - + def analyze(self, y: np.ndarray, sr: int) -> Dict[str, Any]: """Perform complete dynamics analysis. - + Args: y: Audio signal sr: Sample rate - + Returns: Complete dynamics analysis results """ # Extract energy features energy_features = self.extract_energy_features(y, sr) - + # Calculate dynamic range dynamic_range = self.calculate_dynamic_range(energy_features['rms']) - + # Analyze loudness loudness = self.analyze_loudness(y, sr) - + # Generate energy profile energy_profile = self.generate_energy_profile(energy_features) - + # Detect climax points climax_points_data = self.detect_climax_points(energy_features) - + # Analyze tension curve tension_data = self.analyze_tension_curve(energy_features) - + # Analyze energy distribution energy_distribution = self.analyze_energy_distribution(energy_features) - + # Detect dynamic events dynamic_events_data = self.detect_dynamic_events(energy_features) - + # Convert climax points to list format expected by tests climax_points_list = [] for i, time in enumerate(climax_points_data['climax_times']): - climax_points_list.append({ - 'time': float(time), - 'intensity': float(climax_points_data['climax_energies'][i]) if i < len(climax_points_data['climax_energies']) else 0.0 - }) - + climax_points_list.append( + { + 'time': float(time), + 'intensity': ( + float(climax_points_data['climax_energies'][i]) + if i < len(climax_points_data['climax_energies']) + else 0.0 + ), + } + ) + # Convert dynamic events to list format expected by tests dynamic_events_list = [] for event_type, events in dynamic_events_data.items(): @@ -666,15 +689,31 @@ def analyze(self, y: np.ndarray, sr: int) -> Dict[str, Any]: event_copy = event.copy() event_copy['type'] = event_type.rstrip('s') # Remove plural 's' dynamic_events_list.append(event_copy) - + return { 'dynamic_range': dynamic_range, 'loudness': loudness, - 'energy_profile': energy_profile['energy_curve'].tolist() if len(energy_profile['energy_curve']) > 0 else [], + 'energy_profile': ( + energy_profile['energy_curve'].tolist() + if len(energy_profile['energy_curve']) > 0 + else [] + ), 'climax_points': climax_points_list, - 'tension_curve': tension_data['tension_curve'].tolist() if len(tension_data['tension_curve']) > 0 else [], + 'tension_curve': ( + tension_data['tension_curve'].tolist() + if len(tension_data['tension_curve']) > 0 + else [] + ), 'energy_distribution': energy_distribution, 'dynamic_events': dynamic_events_list, - 'overall_energy': float(np.mean(energy_features['rms'])) if len(energy_features['rms']) > 0 else 0.0, - 'energy_variance': float(np.var(energy_features['rms'])) if len(energy_features['rms']) > 0 else 0.0 - } \ No newline at end of file + 'overall_energy': ( + float(np.mean(energy_features['rms'])) + if len(energy_features['rms']) > 0 + else 0.0 + ), + 'energy_variance': ( + float(np.var(energy_features['rms'])) + if len(energy_features['rms']) > 0 + else 0.0 + ), + } diff --git a/src/bpm_detector/effects_detector.py b/src/bpm_detector/effects_detector.py index 0a62006..520d88f 100644 --- a/src/bpm_detector/effects_detector.py +++ b/src/bpm_detector/effects_detector.py @@ -7,151 +7,152 @@ class EffectsDetector: """Detects audio effects usage in signals.""" - + def __init__(self, hop_length: int = 512): """Initialize effects detector. - + Args: hop_length: Hop length for analysis """ self.hop_length = hop_length - + def analyze_effects_usage(self, y: np.ndarray, sr: int) -> Dict[str, float]: """Analyze usage of audio effects. - + Args: y: Audio signal sr: Sample rate - + Returns: Dictionary of effect usage scores """ effects = {} - + # Detect individual effects effects['reverb'] = self._detect_reverb(y, sr) effects['distortion'] = self._detect_distortion(y, sr) effects['chorus'] = self._detect_chorus(y, sr) effects['compression'] = self._detect_compression(y) - + return effects - + def _detect_reverb(self, y: np.ndarray, sr: int) -> float: """Detect reverb presence. - + Args: y: Audio signal sr: Sample rate - + Returns: Reverb amount (0-1) """ # Calculate envelope decay characteristics envelope = np.abs(librosa.stft(y)) envelope_mean = np.mean(envelope, axis=0) - + # Look for exponential decay patterns if len(envelope_mean) < 10: return 0.2 # Default low reverb amount - + # Calculate autocorrelation to find decay patterns autocorr = np.correlate(envelope_mean, envelope_mean, mode='full') - autocorr = autocorr[len(autocorr)//2:] - + autocorr = autocorr[len(autocorr) // 2 :] + # Look for long-term correlations (indicating reverb tail) - long_term_corr = np.mean(autocorr[len(autocorr)//4:len(autocorr)//2]) - short_term_corr = np.mean(autocorr[:len(autocorr)//8]) - + long_term_corr = np.mean(autocorr[len(autocorr) // 4 : len(autocorr) // 2]) + short_term_corr = np.mean(autocorr[: len(autocorr) // 8]) + if short_term_corr == 0: return 0.2 # Default low reverb amount - + reverb_ratio = long_term_corr / short_term_corr - + # Check for additional reverb indicators # Calculate energy decay rate energy_decay = np.diff(envelope_mean) decay_variance = np.var(energy_decay) - + # More variance in decay suggests reverb decay_factor = min(0.3, decay_variance * 10.0) - + # Combine ratio and decay analysis base_reverb = min(0.7, max(0.0, reverb_ratio * 1.5)) total_reverb = base_reverb + decay_factor - + return min(0.95, total_reverb) - + def _detect_distortion(self, y: np.ndarray, sr: int) -> float: """Detect distortion presence. - + Args: y: Audio signal sr: Sample rate - + Returns: Distortion amount (0-1) """ # Calculate harmonic content stft = librosa.stft(y) magnitude = np.abs(stft) - + # Look for harmonic distortion (odd harmonics) - freqs = librosa.fft_frequencies(sr=sr, n_fft=stft.shape[0]*2-1) - + # Calculate total harmonic distortion approximation - fundamental_energy = np.mean(magnitude[:len(magnitude)//4, :]) - harmonic_energy = np.mean(magnitude[len(magnitude)//4:, :]) - + fundamental_energy = np.mean(magnitude[: len(magnitude) // 4, :]) + harmonic_energy = np.mean(magnitude[len(magnitude) // 4 :, :]) + if fundamental_energy == 0: return 0.0 - + distortion_ratio = harmonic_energy / fundamental_energy - + return min(1.0, distortion_ratio) - + def _detect_chorus(self, y: np.ndarray, sr: int) -> float: """Detect chorus/modulation effects. - + Args: y: Audio signal sr: Sample rate - + Returns: Chorus amount (0-1) """ # Calculate spectral centroid variation spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr) - + if spectral_centroid.size == 0: return 0.0 - + # Look for periodic variations in spectral centroid - centroid_variation = np.std(spectral_centroid) / (np.mean(spectral_centroid) + 1e-8) - + centroid_variation = np.std(spectral_centroid) / ( + np.mean(spectral_centroid) + 1e-8 + ) + # Normalize to 0-1 scale chorus_amount = min(1.0, centroid_variation * 5.0) - + return chorus_amount - + def _detect_compression(self, y: np.ndarray) -> float: """Detect compression presence. - + Args: y: Audio signal - + Returns: Compression amount (0-1) """ # Calculate dynamic range rms = librosa.feature.rms(y=y) - + if rms.size == 0: return 0.0 - + # Calculate coefficient of variation rms_cv = np.std(rms) / (np.mean(rms) + 1e-8) - + # Lower variation indicates more compression compression_amount = 1.0 - min(1.0, rms_cv * 2.0) - - return float(max(0.0, compression_amount)) \ No newline at end of file + + return float(max(0.0, compression_amount)) diff --git a/src/bpm_detector/instrument_classifier.py b/src/bpm_detector/instrument_classifier.py index 1be1cb0..ca37b1b 100644 --- a/src/bpm_detector/instrument_classifier.py +++ b/src/bpm_detector/instrument_classifier.py @@ -7,7 +7,7 @@ class InstrumentClassifier: """Classifies instruments present in audio signals.""" - + # Instrument frequency ranges (Hz) - expanded and refined INSTRUMENT_RANGES = { 'vocals': (80, 1100), @@ -28,46 +28,51 @@ class InstrumentClassifier: 'flute': (262, 2093), 'violin': (196, 3136), 'cello': (65, 1047), - 'saxophone': (138, 880) + 'saxophone': (138, 880), } - + def __init__(self, hop_length: int = 512, n_fft: int = 2048): """Initialize instrument classifier. - + Args: hop_length: Hop length for analysis n_fft: FFT size """ self.hop_length = hop_length self.n_fft = n_fft - + def classify_instruments(self, y: np.ndarray, sr: int) -> List[Dict[str, Any]]: """Classify instruments present in the audio. - + Args: y: Audio signal sr: Sample rate - + Returns: List of detected instruments with confidence scores """ instruments = [] - + # Separate harmonic and percussive components harmonic, percussive = librosa.effects.hpss(y) - + # Compute STFT for frequency analysis stft = librosa.stft(y, hop_length=self.hop_length, n_fft=self.n_fft) magnitude = np.abs(stft) freqs = librosa.fft_frequencies(sr=sr, n_fft=self.n_fft) - + for instrument, freq_range in self.INSTRUMENT_RANGES.items(): low_freq, high_freq = freq_range confidence = self._calculate_instrument_confidence( - magnitude, freqs, low_freq=low_freq, high_freq=high_freq, - instrument=instrument, harmonic=harmonic, percussive=percussive + magnitude, + freqs, + low_freq=low_freq, + high_freq=high_freq, + instrument=instrument, + harmonic=harmonic, + percussive=percussive, ) - + # Use different thresholds for different instrument types if instrument in ['vocals', 'piano', 'guitar', 'bass']: threshold = 0.15 @@ -75,32 +80,42 @@ def classify_instruments(self, y: np.ndarray, sr: int) -> List[Dict[str, Any]]: threshold = 0.12 else: threshold = 0.18 - + if confidence > threshold: prominence = self._calculate_instrument_prominence( magnitude, freqs, low_freq=low_freq, high_freq=high_freq ) - - instruments.append({ - 'instrument': instrument, - 'confidence': confidence, - 'prominence': prominence - }) - + + instruments.append( + { + 'instrument': instrument, + 'confidence': confidence, + 'prominence': prominence, + } + ) + # Filter redundant instruments instruments = self._filter_redundant_instruments(instruments) - + # Sort by confidence instruments.sort(key=lambda x: x['confidence'], reverse=True) - + return instruments - - def _calculate_instrument_confidence(self, magnitude: np.ndarray, freqs: np.ndarray, - freq_range: tuple = None, low_freq: float = None, high_freq: float = None, - instrument: str = None, harmonic: np.ndarray = None, - percussive: np.ndarray = None, spectral_shape=None) -> float: + + def _calculate_instrument_confidence( + self, + magnitude: np.ndarray, + freqs: np.ndarray, + freq_range: tuple = None, + low_freq: float = None, + high_freq: float = None, + instrument: str = None, + harmonic: np.ndarray = None, + percussive: np.ndarray = None, + spectral_shape=None, + ) -> float: """Calculate confidence for instrument presence. - + Args: magnitude: STFT magnitude freqs: Frequency bins @@ -111,7 +126,7 @@ def _calculate_instrument_confidence(self, magnitude: np.ndarray, freqs: np.ndar harmonic: Harmonic component percussive: Percussive component spectral_shape: Spectral shape hint - + Returns: Confidence score (0-1) """ @@ -120,13 +135,13 @@ def _calculate_instrument_confidence(self, magnitude: np.ndarray, freqs: np.ndar low_freq, high_freq = freq_range elif low_freq is None or high_freq is None: return 0.0 - + # Find frequency range freq_mask = (freqs >= low_freq) & (freqs <= high_freq) - + if not np.any(freq_mask): return 0.0 - + # Calculate energy in frequency range if magnitude.ndim == 2: range_energy = np.mean(magnitude[freq_mask, :]) @@ -134,26 +149,28 @@ def _calculate_instrument_confidence(self, magnitude: np.ndarray, freqs: np.ndar else: range_energy = np.mean(magnitude[freq_mask]) total_energy = np.mean(magnitude) - + if total_energy == 0: return 0.0 - + energy_ratio = range_energy / total_energy - + # Apply instrument-specific heuristics if components are available if harmonic is not None and percussive is not None and instrument is not None: if instrument in ['kick_drum', 'snare_drum', 'hi_hat', 'drums', 'cymbals']: # Percussive instruments - check percussive component try: perc_energy = np.mean(np.abs(librosa.stft(percussive))) - total_perc_energy = np.mean(np.abs(librosa.stft(percussive + harmonic))) - + total_perc_energy = np.mean( + np.abs(librosa.stft(percussive + harmonic)) + ) + if total_perc_energy > 0: perc_ratio = perc_energy / total_perc_energy confidence = (energy_ratio * 1.5 + perc_ratio) / 2.0 else: confidence = energy_ratio * 1.5 - except: + except Exception: confidence = energy_ratio * 1.5 elif instrument == 'bass': # Bass instruments - boost low frequency detection @@ -165,30 +182,38 @@ def _calculate_instrument_confidence(self, magnitude: np.ndarray, freqs: np.ndar # Harmonic instruments - check harmonic component try: harm_energy = np.mean(np.abs(librosa.stft(harmonic))) - total_harm_energy = np.mean(np.abs(librosa.stft(harmonic + percussive))) - + total_harm_energy = np.mean( + np.abs(librosa.stft(harmonic + percussive)) + ) + if total_harm_energy > 0: harm_ratio = harm_energy / total_harm_energy confidence = (energy_ratio + harm_ratio) / 2.0 else: confidence = energy_ratio - except: + except Exception: confidence = energy_ratio else: confidence = energy_ratio - + return min(1.0, confidence * 2.5) - - def _calculate_instrument_prominence(self, magnitude: np.ndarray, freqs: np.ndarray, - freq_range: tuple = None, low_freq: float = None, high_freq: float = None) -> float: + + def _calculate_instrument_prominence( + self, + magnitude: np.ndarray, + freqs: np.ndarray, + freq_range: tuple = None, + low_freq: float = None, + high_freq: float = None, + ) -> float: """Calculate instrument prominence in the mix. - + Args: magnitude: STFT magnitude freqs: Frequency bins low_freq: Low frequency bound high_freq: High frequency bound - + Returns: Prominence score (0-1) """ @@ -197,32 +222,34 @@ def _calculate_instrument_prominence(self, magnitude: np.ndarray, freqs: np.ndar low_freq, high_freq = freq_range elif low_freq is None or high_freq is None: return 0.0 - + freq_mask = (freqs >= low_freq) & (freqs <= high_freq) - + if not np.any(freq_mask): return 0.0 - + if magnitude.ndim == 2: range_energy = np.mean(magnitude[freq_mask, :]) total_energy = np.mean(magnitude) else: range_energy = np.mean(magnitude[freq_mask]) total_energy = np.mean(magnitude) - + if total_energy == 0: return 0.0 - + prominence = range_energy / total_energy - + return min(1.0, prominence * 3.0) # Amplify for better range - - def _filter_redundant_instruments(self, instruments: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + + def _filter_redundant_instruments( + self, instruments: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: """Filter out redundant instrument detections. - + Args: instruments: List of detected instruments - + Returns: Filtered list of instruments """ @@ -231,23 +258,23 @@ def _filter_redundant_instruments(self, instruments: List[Dict[str, Any]]) -> Li ['guitar', 'electric_guitar'], ['violin', 'strings'], ['cello', 'strings'], - ['piano'] # Piano should be unique + ['piano'], # Piano should be unique ] - + # Allow multiple drum instruments to coexist # Don't group drums together - let kick_drum, snare_drum, hi_hat all appear - + filtered = [] used_groups = set() seen_instruments = set() - + for instrument in instruments: inst_name = instrument['instrument'] - + # Skip if we've already seen this exact instrument if inst_name in seen_instruments: continue - + # Check if this instrument belongs to a group group_found = False for i, group in enumerate(instrument_groups): @@ -259,10 +286,10 @@ def _filter_redundant_instruments(self, instruments: List[Dict[str, Any]]) -> Li seen_instruments.add(inst_name) group_found = True break - + if not group_found: # Not in any group - keep it filtered.append(instrument) seen_instruments.add(inst_name) - - return filtered \ No newline at end of file + + return filtered diff --git a/src/bpm_detector/key_validation.py b/src/bpm_detector/key_validation.py index 75b65b7..72be3e3 100644 --- a/src/bpm_detector/key_validation.py +++ b/src/bpm_detector/key_validation.py @@ -8,18 +8,18 @@ class KeyValidator: """Validates and refines key detection results.""" - + @staticmethod def validate_relative_keys( - key_note: str, - mode: str, + key_note: str, + mode: str, confidence: float, - chroma_mean: np.ndarray, + chroma_mean: np.ndarray, correlations: List[float] = None, - key_names: List[str] = None + key_names: List[str] = None, ) -> Tuple[str, str, float]: """Validate and potentially correct key detection using relative major/minor analysis. - + Args: key_note: Detected key note mode: Detected mode @@ -27,7 +27,7 @@ def validate_relative_keys( chroma_mean: Average chroma vector correlations: All key correlations key_names: All key names - + Returns: (final_key, final_mode, final_confidence) """ @@ -35,11 +35,11 @@ def validate_relative_keys( key_index = NOTE_NAMES.index(key_note) except ValueError: return key_note, mode, confidence - + # If correlations and key_names are not provided, just return original values if correlations is None or key_names is None: return key_note, mode, confidence - + # Find relative major/minor if mode == 'Major': # Relative minor is 3 semitones down (minor third) @@ -51,7 +51,7 @@ def validate_relative_keys( relative_major_index = (key_index + 3) % 12 relative_major_name = NOTE_NAMES[relative_major_index] relative_key_name = f"{relative_major_name} Major" - + # Find correlation for relative key try: relative_key_idx = key_names.index(relative_key_name) @@ -59,140 +59,150 @@ def validate_relative_keys( current_correlation = correlations[key_names.index(f"{key_note} {mode}")] except (ValueError, IndexError): return key_note, mode, confidence - + # If relative key has significantly higher correlation, consider switching correlation_diff = relative_correlation - current_correlation - + # Relaxed threshold for relative major/minor switching - if abs(correlation_diff) < _Constants.REL_SWITCH_THRESH: # Close correlations, need deeper analysis + if ( + abs(correlation_diff) < _Constants.REL_SWITCH_THRESH + ): # Close correlations, need deeper analysis # Analyze chord progression tendencies - major_tendency = KeyValidator._analyze_major_tendency(chroma_mean, key_index) - minor_tendency = KeyValidator._analyze_minor_tendency(chroma_mean, key_index) - + major_tendency = KeyValidator._analyze_major_tendency( + chroma_mean, key_index + ) + minor_tendency = KeyValidator._analyze_minor_tendency( + chroma_mean, key_index + ) + if mode == 'Major' and minor_tendency > major_tendency + 0.2: # Switch to relative minor return relative_minor_name, 'Minor', confidence * 0.9 elif mode == 'Minor' and major_tendency > minor_tendency + 0.2: # Switch to relative major return relative_major_name, 'Major', confidence * 1.1 - + elif correlation_diff > _Constants.REL_SWITCH_THRESH: # Relative key stronger if mode == 'Major': return relative_minor_name, 'Minor', confidence * 1.1 else: return relative_major_name, 'Major', confidence * 1.1 - + return key_note, mode, confidence - + @staticmethod def _analyze_major_tendency(chroma_mean: np.ndarray, key_index: int) -> float: """Analyze tendency towards major tonality.""" # Major chord tones: I, III, V (root, major third, fifth) major_third = (key_index + 4) % 12 fifth = (key_index + 7) % 12 - - major_strength = (chroma_mean[key_index] + - chroma_mean[major_third] * 1.2 + # Major third is characteristic - chroma_mean[fifth]) / 3.2 - + + major_strength = ( + chroma_mean[key_index] + + chroma_mean[major_third] * 1.2 # Major third is characteristic + + chroma_mean[fifth] + ) / 3.2 + return major_strength - + @staticmethod def _analyze_minor_tendency(chroma_mean: np.ndarray, key_index: int) -> float: """Analyze tendency towards minor tonality.""" # Minor chord tones: i, ♭III, V (root, minor third, fifth) minor_third = (key_index + 3) % 12 fifth = (key_index + 7) % 12 - - minor_strength = (chroma_mean[key_index] + - chroma_mean[minor_third] * 1.2 + # Minor third is characteristic - chroma_mean[fifth]) / 3.2 - + + minor_strength = ( + chroma_mean[key_index] + + chroma_mean[minor_third] * 1.2 # Minor third is characteristic + + chroma_mean[fifth] + ) / 3.2 + return minor_strength class JPOPKeyDetector: """Specialized detector for J-Pop keys.""" - + @staticmethod def detect_jpop_keys( - chroma_mean: np.ndarray, + chroma_mean: np.ndarray, correlations: List[float], - enable_jpop: bool = True, - key_names: List[str] = None + enable_jpop: bool = True, + key_names: List[str] = None, ) -> Tuple[str, str, float]: """Special detection for common J-Pop keys like G# minor.""" - + # Common J-Pop keys to check specifically jpop_keys = [ ('G#', 'Minor'), # Very common in J-Pop ('D#', 'Minor'), # Also very common ('F#', 'Minor'), # Common ('C#', 'Minor'), # Common - ('B', 'Major'), # Relative major of G# minor - ('F#', 'Major') # Relative major of D# minor + ('B', 'Major'), # Relative major of G# minor + ('F#', 'Major'), # Relative major of D# minor ] - + best_key = 'None' best_mode = 'Unknown' best_strength = 0.0 - + for key_note, mode in jpop_keys: try: key_index = NOTE_NAMES.index(key_note) - + # Calculate specific strength for this key if mode == 'Minor': # Check for characteristic minor chord patterns tonic = chroma_mean[key_index] minor_third = chroma_mean[(key_index + 3) % 12] fifth = chroma_mean[(key_index + 7) % 12] - relative_major = chroma_mean[(key_index + 3) % 12] # III - # G# minor specific pattern: G#m - F# - E - C#m if key_note == 'G#': f_sharp = chroma_mean[6] # F# - e = chroma_mean[4] # E + e = chroma_mean[4] # E c_sharp = chroma_mean[1] # C# - + # Check for G#m - F# - E - C#m pattern - pattern_strength = (tonic * 1.5 + f_sharp * 1.2 + e * 1.1 + c_sharp * 1.0) / 4.8 - + pattern_strength = ( + tonic * 1.5 + f_sharp * 1.2 + e * 1.1 + c_sharp * 1.0 + ) / 4.8 + # Boost if this pattern is strong (lowered threshold) if pattern_strength > _Constants.PATTERN_THRESH: strength = pattern_strength * 1.5 # Increased boost else: strength = (tonic + minor_third * 1.2 + fifth) / 3.2 - + # Additional boost for G# minor specifically strength *= 1.2 else: # Standard minor key strength strength = (tonic + minor_third * 1.2 + fifth) / 3.2 - + else: # Major tonic = chroma_mean[key_index] major_third = chroma_mean[(key_index + 4) % 12] fifth = chroma_mean[(key_index + 7) % 12] strength = (tonic + major_third * 1.1 + fifth) / 3.1 - + # Check against existing correlation key_name = f"{key_note} {mode}" if key_names is not None and key_name in key_names: correlation_idx = key_names.index(key_name) correlation_strength = correlations[correlation_idx] - + # Combine pattern strength with correlation combined_strength = (strength + correlation_strength) / 2.0 else: combined_strength = strength - + if combined_strength > best_strength: best_strength = combined_strength best_key = key_note best_mode = mode - + except (ValueError, IndexError): continue - - return best_key, best_mode, best_strength \ No newline at end of file + + return best_key, best_mode, best_strength diff --git a/src/bpm_detector/parallel_analyzer.py b/src/bpm_detector/parallel_analyzer.py index 3d0817e..4e4810f 100644 --- a/src/bpm_detector/parallel_analyzer.py +++ b/src/bpm_detector/parallel_analyzer.py @@ -1,94 +1,113 @@ """Parallel audio analyzer with progress tracking and auto-optimization.""" -import time -import signal -import concurrent.futures import numpy as np from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor from typing import Dict, Any, List, Union, Optional, Callable from .music_analyzer import AudioAnalyzer -from .auto_parallel import AutoParallelConfig, SystemMonitor, PerformanceProfiler, ParallelConfig -from .progress_manager import ProgressManager, ProgressCallback, TaskStatus +from .auto_parallel import ( + AutoParallelConfig, + SystemMonitor, + PerformanceProfiler, + ParallelConfig, +) +from .progress_manager import ProgressManager, ProgressCallback class SmartParallelAudioAnalyzer(AudioAnalyzer): """Smart parallel audio analyzer with automatic optimization.""" - - def __init__(self, auto_parallel: bool = True, max_workers: Optional[int] = None, **kwargs): + + def __init__( + self, auto_parallel: bool = True, max_workers: Optional[int] = None, **kwargs + ): super().__init__(**kwargs) self.auto_parallel = auto_parallel self.system_monitor = SystemMonitor() self.performance_profiler = PerformanceProfiler() self._parallel_config: Optional[ParallelConfig] = None self._manual_max_workers = max_workers - + if auto_parallel: self._configure_auto_parallel() - + def _configure_auto_parallel(self): """Configure automatic parallelization.""" self._parallel_config = AutoParallelConfig.get_optimal_config() - + # Override with manual setting if provided if self._manual_max_workers: self._parallel_config.max_workers = self._manual_max_workers - self._parallel_config.reason += f" (manual override: {self._manual_max_workers} workers)" - + self._parallel_config.reason += ( + f" (manual override: {self._manual_max_workers} workers)" + ) + if self._parallel_config.enable_parallel: # Calculate physical memory info import psutil + total_memory_gb = psutil.virtual_memory().total / (1024**3) memory_limit_gb = self._parallel_config.memory_limit_mb / 1024 memory_percentage = (memory_limit_gb / total_memory_gb) * 100 - + print(f"🚀 Auto-parallel enabled: {self._parallel_config.reason}") print(f" Workers: {self._parallel_config.max_workers}") - print(f" Memory limit: {self._parallel_config.memory_limit_mb}MB ({memory_limit_gb:.1f}GB, {memory_percentage:.0f}% of {total_memory_gb:.1f}GB physical)") + print( + f" Memory limit: {self._parallel_config.memory_limit_mb}MB ({memory_limit_gb:.1f}GB, {memory_percentage:.0f}% of {total_memory_gb:.1f}GB physical)" + ) print(f" Strategy: {self._parallel_config.strategy.value}") - + # Start load monitoring self.system_monitor.start_monitoring() else: print(f"⚡ Sequential processing: {self._parallel_config.reason}") - + def analyze_file( self, path: Union[str, List[str]], comprehensive: bool = True, progress_callback: Optional[Callable] = None, - progress_display = None, + progress_display=None, detailed_progress: bool = False, - **kwargs + **kwargs, ) -> Union[Dict[str, Any], Dict[str, Dict[str, Any]]]: """Analyze audio file(s) with smart parallelization.""" - + if isinstance(path, (list, tuple)): return self._analyze_multiple_files( - path, comprehensive, progress_callback, progress_display, detailed_progress, **kwargs + path, + comprehensive, + progress_callback, + progress_display, + detailed_progress, + **kwargs, ) else: return self._analyze_single_file( - path, comprehensive, progress_callback, progress_display, detailed_progress, **kwargs + path, + comprehensive, + progress_callback, + progress_display, + detailed_progress, + **kwargs, ) - + def _analyze_single_file( self, path: str, comprehensive: bool = True, progress_callback: Optional[Callable] = None, - progress_display = None, + progress_display=None, detailed_progress: bool = False, - **kwargs + **kwargs, ) -> Dict[str, Any]: """Analyze single file with parallel processing.""" - + if not comprehensive or not self._should_use_parallel(): return super().analyze_file(path, **kwargs) - + # Setup progress management progress_manager = ProgressManager() - + # Setup progress display or callback with safety measures if progress_display: # Use progress display for detailed multi-bar progress with safety @@ -97,6 +116,7 @@ def safe_display_callback(pm): progress_display.update(pm) except Exception as e: print(f"Progress display error: {e}") + progress_manager.add_callback(safe_display_callback) elif progress_callback: # Use simple callback for basic progress @@ -108,67 +128,73 @@ def safe_progress_callback(pm): message = f"Running: {', '.join(running_tasks[:2])}" else: with pm._lock: - completed = sum(1 for t in pm._tasks.values() if t.status.value == "completed") + completed = sum( + 1 + for t in pm._tasks.values() + if t.status.value == "completed" + ) total = len(pm._tasks) message = f"Completed: {completed}/{total} tasks" progress_callback(overall_progress, message) - except Exception as e: + except Exception: # Fallback to simple message progress_callback(pm.get_overall_progress(), "Processing...") - + progress_manager.add_callback(safe_progress_callback) - + try: # Load audio first if progress_callback: progress_callback(5, "Loading audio file...") - + import librosa + y, sr = librosa.load( path, sr=self.sr, mono=True, dtype=np.float32, # Use float32 for better memory efficiency - res_type='kaiser_fast' # Faster resampling + res_type='kaiser_fast', # Faster resampling ) sr = int(sr) # Ensure sr is int for type checking - + if progress_callback: progress_callback(10, "Starting parallel analysis...") - + # Parallel comprehensive analysis results = self._parallel_comprehensive_analysis( y, sr, path, progress_manager, **kwargs ) - + # Clear audio data from memory early del y import gc + gc.collect() - + if progress_callback: progress_callback(100, "Analysis completed!") - + return results - + except Exception as e: if progress_callback: progress_callback(0, f"Error: {e}") raise finally: progress_manager.reset() - + def _analyze_multiple_files( self, paths: List[str], comprehensive: bool = True, progress_callback: Optional[Callable] = None, - progress_display = None, + progress_display=None, detailed_progress: bool = False, - **kwargs + **kwargs, ) -> Dict[str, Dict[str, Any]]: """Analyze multiple files with parallel processing.""" - + # Adjust configuration for multiple files if self._parallel_config: adjusted_config = AutoParallelConfig.get_file_count_adjustment( @@ -176,37 +202,32 @@ def _analyze_multiple_files( ) else: adjusted_config = AutoParallelConfig.get_optimal_config() - + if not adjusted_config.enable_parallel: # Sequential processing results = {} for i, path in enumerate(paths): if progress_callback: progress_callback( - int(100 * i / len(paths)), - f"Processing {path} ({i+1}/{len(paths)})" + int(100 * i / len(paths)), + f"Processing {path} ({i+1}/{len(paths)})", ) results[path] = self.analyze_file(path, **kwargs) - + if progress_callback: progress_callback(100, "All files completed!") return results - + # Parallel processing return self._parallel_analyze_multiple_files( paths, comprehensive, adjusted_config, progress_callback, **kwargs ) - + def _parallel_comprehensive_analysis( - self, - y, - sr, - path: str, - progress_manager: ProgressManager, - **kwargs + self, y, sr, path: str, progress_manager: ProgressManager, **kwargs ) -> Dict[str, Any]: """Perform parallel comprehensive analysis.""" - + # Register analysis tasks analysis_tasks = [ ('basic_info', 'Basic Analysis'), @@ -215,55 +236,86 @@ def _parallel_comprehensive_analysis( ('rhythm', 'Rhythm Analysis'), ('timbre', 'Timbre Analysis'), ('melody_harmony', 'Melody & Harmony'), - ('dynamics', 'Dynamics Analysis') + ('dynamics', 'Dynamics Analysis'), ] - + for task_id, name in analysis_tasks: progress_manager.register_task(task_id, name) - + # Basic analysis first (needed by other analyzers) basic_callback = ProgressCallback(progress_manager, 'basic_info') - basic_info = self._analyze_basic_info_with_progress(y, sr, path, basic_callback, **kwargs) + basic_info = self._analyze_basic_info_with_progress( + y, sr, path, basic_callback, **kwargs + ) basic_callback.complete() - + key = basic_info.get('key') max_workers = self._get_current_max_workers() - + # Use more aggressive parallelization for better performance - effective_workers = min(max_workers * 3, 24) # Triple the workers for analysis tasks - + effective_workers = min( + max_workers * 3, 24 + ) # Triple the workers for analysis tasks + # Parallel analysis execution with ThreadPoolExecutor(max_workers=effective_workers) as executor: futures = { 'chord_progression': executor.submit( - self._analyze_with_progress, 'chord_progression', - progress_manager, self.chord_analyzer.analyze, y, sr, key, float(basic_info.get('bpm', 130.0)) + self._analyze_with_progress, + 'chord_progression', + progress_manager, + self.chord_analyzer.analyze, + y, + sr, + key, + float(basic_info.get('bpm', 130.0)), ), 'structure': executor.submit( - self._analyze_with_progress, 'structure', - progress_manager, self._analyze_structure_with_progress, y, sr, float(basic_info.get('bpm', 130.0)) + self._analyze_with_progress, + 'structure', + progress_manager, + self._analyze_structure_with_progress, + y, + sr, + float(basic_info.get('bpm', 130.0)), ), 'rhythm': executor.submit( - self._analyze_with_progress, 'rhythm', - progress_manager, self.rhythm_analyzer.analyze, y, sr + self._analyze_with_progress, + 'rhythm', + progress_manager, + self.rhythm_analyzer.analyze, + y, + sr, ), 'timbre': executor.submit( - self._analyze_with_progress, 'timbre', - progress_manager, self.timbre_analyzer.analyze, y, sr + self._analyze_with_progress, + 'timbre', + progress_manager, + self.timbre_analyzer.analyze, + y, + sr, ), 'melody_harmony': executor.submit( - self._analyze_with_progress, 'melody_harmony', - progress_manager, self.melody_harmony_analyzer.analyze, y, sr + self._analyze_with_progress, + 'melody_harmony', + progress_manager, + self.melody_harmony_analyzer.analyze, + y, + sr, ), 'dynamics': executor.submit( - self._analyze_with_progress, 'dynamics', - progress_manager, self.dynamics_analyzer.analyze, y, sr - ) + self._analyze_with_progress, + 'dynamics', + progress_manager, + self.dynamics_analyzer.analyze, + y, + sr, + ), } - + # Collect results results = {'basic_info': basic_info} - + for task_id, future in futures.items(): try: results[task_id] = future.result(timeout=None) # No timeout @@ -271,35 +323,35 @@ def _parallel_comprehensive_analysis( print(f"❌ Warning: Error in {task_id} analysis: {e}") progress_manager.complete_task(task_id, False, str(e)) results[task_id] = {} - + # Generate additional features try: feature_vector = self.similarity_engine.extract_feature_vector(results) results["similarity_features"] = { "feature_vector": feature_vector.tolist(), - "feature_weights": self.similarity_engine.feature_weights + "feature_weights": self.similarity_engine.feature_weights, } - + results["reference_tags"] = self._generate_reference_tags(results) results["production_notes"] = self._generate_production_notes(results) - + except Exception as e: print(f"Warning: Error generating additional features: {e}") - + return results - + def _parallel_analyze_multiple_files( - self, - paths: List[str], - comprehensive: bool, - config: ParallelConfig, + self, + paths: List[str], + comprehensive: bool, + config: ParallelConfig, progress_callback: Optional[Callable] = None, - **kwargs + **kwargs, ) -> Dict[str, Dict[str, Any]]: """Parallel analysis of multiple files.""" - + max_workers = config.max_workers - + if config.use_process_pool: # Process parallelization for multiple files with ProcessPoolExecutor(max_workers=max_workers) as executor: @@ -309,69 +361,67 @@ def _parallel_analyze_multiple_files( ) for path in paths } - + results = {} completed = 0 - + for path, future in futures.items(): try: results[path] = future.result() completed += 1 - + if progress_callback: progress_callback( int(100 * completed / len(paths)), - f"Completed {completed}/{len(paths)} files" + f"Completed {completed}/{len(paths)} files", ) except Exception as e: results[path] = {'error': str(e)} completed += 1 - + return results else: # Thread parallelization with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = { - path: executor.submit( - self.analyze_file, path, **kwargs - ) + path: executor.submit(self.analyze_file, path, **kwargs) for path in paths } - + results = {} completed = 0 - + for path, future in futures.items(): try: results[path] = future.result() completed += 1 - + if progress_callback: progress_callback( int(100 * completed / len(paths)), - f"Completed {completed}/{len(paths)} files" + f"Completed {completed}/{len(paths)} files", ) except Exception as e: results[path] = {'error': str(e)} completed += 1 - + return results - + def _analyze_with_progress( - self, - task_id: str, - progress_manager: ProgressManager, - analyzer_func: Callable, - *args, - **kwargs + self, + task_id: str, + progress_manager: ProgressManager, + analyzer_func: Callable, + *args, + **kwargs, ): """Execute analysis with progress tracking.""" callback = ProgressCallback(progress_manager, task_id) profile_data = self.performance_profiler.start_profiling(task_id) - + try: callback(0, "Starting...") - + # Special handling for detailed progress analyzers if task_id == 'structure': # Extract arguments: y, sr, bpm @@ -385,51 +435,55 @@ def _analyze_with_progress( # Enhanced progress for other analyzers with callback injection callback(10, "Initializing...") callback(25, "Processing audio...") - + # Try to inject progress callback into analyzer function try: # Check if analyzer accepts progress_callback parameter import inspect + sig = inspect.signature(analyzer_func) if 'progress_callback' in sig.parameters: - result = analyzer_func(*args, progress_callback=callback, **kwargs) + result = analyzer_func( + *args, progress_callback=callback, **kwargs + ) else: result = analyzer_func(*args, **kwargs) callback(75, "Processing completed...") - except Exception as e: + except Exception: # Fallback to original call result = analyzer_func(*args, **kwargs) callback(75, "Processing completed...") - + callback(90, "Finalizing results...") - + callback.complete(True) self.performance_profiler.end_profiling(profile_data) return result - + except Exception as e: callback.complete(False, str(e)) self.performance_profiler.end_profiling(profile_data) raise - + def _analyze_basic_info_with_progress(self, y, sr, path, callback, **kwargs): """Basic analysis with progress tracking.""" callback(5, "Starting basic analysis...") - + callback(10, "Calculating duration...") duration = len(y) / sr callback(15, "Analyzing BPM...") - + bpm, bpm_conf, top_bpms, top_hits = self.bpm_detector.detect( - y, sr, + y, + sr, kwargs.get('min_bpm', 40.0), kwargs.get('max_bpm', 300.0), - kwargs.get('start_bpm', 150.0) + kwargs.get('start_bpm', 150.0), ) callback(50, "BPM analysis completed") - + callback(60, "Detecting key...") - + key = None key_conf = 0.0 if kwargs.get('detect_key', True): @@ -437,9 +491,9 @@ def _analyze_basic_info_with_progress(self, y, sr, path, callback, **kwargs): callback(90, "Key detection completed") else: callback(90, "Key detection skipped") - + callback(100, "Basic analysis completed") - + return { "filename": path, "duration": duration, @@ -447,30 +501,32 @@ def _analyze_basic_info_with_progress(self, y, sr, path, callback, **kwargs): "bpm_confidence": bpm_conf, "bpm_candidates": list(zip(top_bpms, top_hits)), "key": key, - "key_confidence": key_conf + "key_confidence": key_conf, } - + def _analyze_structure_with_progress(self, y, sr, callback, bpm=130.0): """Structure analysis with detailed progress.""" callback(5, "Starting structure analysis...") - + callback(10, "Extracting features...") features = self.structure_analyzer.extract_structural_features(y, sr) callback(25, "Features extracted") - + callback(30, "Computing similarity matrix...") - similarity_matrix = self.structure_analyzer.compute_self_similarity_matrix(features) + similarity_matrix = self.structure_analyzer.compute_self_similarity_matrix( + features + ) callback(50, "Similarity matrix computed") - + callback(60, "Detecting boundaries...") - boundaries = self.structure_analyzer.detect_boundaries(similarity_matrix, sr, bpm=bpm) + self.structure_analyzer.detect_boundaries(similarity_matrix, sr, bpm=bpm) callback(75, "Boundaries detected") - + callback(80, "Analyzing structure...") structure_result = self.structure_analyzer.analyze(y, sr, bpm) sections = structure_result['sections'] callback(90, "Structure analyzed") - + callback(90, "Using form analysis from structure result...") # Use form analysis from the complete structure analysis form_analysis = { @@ -478,128 +534,136 @@ def _analyze_structure_with_progress(self, y, sr, callback, bpm=130.0): 'repetition_ratio': structure_result['repetition_ratio'], 'structural_complexity': structure_result['structural_complexity'], 'section_count': structure_result['section_count'], - 'unique_sections': structure_result['unique_sections'] + 'unique_sections': structure_result['unique_sections'], } - + callback(95, "Analyzing section-wise chord progressions...") # Enhance sections with chord progression information enhanced_sections = self._analyze_section_chord_progressions(y, sr, sections) - + callback(98, "Form analysis completed") - + return { 'sections': enhanced_sections, 'form': form_analysis['form'], 'repetition_ratio': form_analysis['repetition_ratio'], 'structural_complexity': form_analysis['structural_complexity'], 'section_count': form_analysis['section_count'], - 'unique_sections': form_analysis.get('unique_sections', 0) + 'unique_sections': form_analysis.get('unique_sections', 0), } - + def _analyze_section_chord_progressions(self, y, sr, sections): """Analyze chord progressions for each section.""" enhanced_sections = [] - + for section in sections: # Extract audio segment for this section start_sample = int(section['start_time'] * sr) end_sample = int(section['end_time'] * sr) segment = y[start_sample:end_sample] - + if len(segment) > sr: # Only analyze segments longer than 1 second try: # Extract chroma features for this segment chroma = self.chord_analyzer.extract_chroma_features(segment, sr) - + # Detect chords for this segment segment_chords = self.chord_analyzer.detect_chords(chroma) - + # Get the most common chord progression for this section if segment_chords: # Take the most frequent chords (simplified) - unique_chords = list(set(segment_chords))[:4] # Max 4 chords per section - section_progression = ' → '.join(unique_chords) if unique_chords else 'Unknown' + unique_chords = list(set(segment_chords))[ + :4 + ] # Max 4 chords per section + section_progression = ( + ' → '.join(unique_chords) if unique_chords else 'Unknown' + ) else: section_progression = 'Unknown' - + except Exception: section_progression = 'Unknown' else: section_progression = 'Unknown' - + # Add chord progression to section info enhanced_section = section.copy() enhanced_section['chord_progression'] = section_progression enhanced_sections.append(enhanced_section) - + return enhanced_sections - + def _analyze_chords_with_progress(self, y, sr, key, bpm, callback): """Chord analysis with detailed progress.""" callback(10, "Starting chord analysis...") - + callback(20, "Extracting chroma features...") chroma = self.chord_analyzer.extract_chroma_features(y, sr) callback(40, "Chroma features extracted") - + callback(50, "Detecting chords...") chords = self.chord_analyzer.detect_chords(chroma, bpm) callback(70, "Chords detected") - + callback(80, "Analyzing progression...") progression_analysis = self.chord_analyzer.analyze_progression(chords) callback(95, "Progression analysis completed") - + return { 'chords': chords, 'main_progression': progression_analysis['main_progression'], 'chord_complexity': progression_analysis['chord_complexity'], - 'harmonic_rhythm': progression_analysis['harmonic_rhythm'] + 'harmonic_rhythm': progression_analysis['harmonic_rhythm'], } - + def _should_use_parallel(self) -> bool: """Check if parallel processing should be used.""" - if not self.auto_parallel or not self._parallel_config or not self._parallel_config.enable_parallel: + if ( + not self.auto_parallel + or not self._parallel_config + or not self._parallel_config.enable_parallel + ): return False - + # Dynamic load check if self.system_monitor.monitoring: return not self.system_monitor.should_reduce_parallelism() - + return True - + def _get_current_max_workers(self) -> int: """Get current recommended worker count.""" if not self._parallel_config: return 1 - + base_workers = self._parallel_config.max_workers - + if self.system_monitor.monitoring: return self.system_monitor.get_recommended_workers(base_workers) - + return base_workers - + def _get_progress_message(self, progress_manager: ProgressManager) -> str: """Generate progress message.""" status = progress_manager.get_status_summary() running_tasks = progress_manager.get_running_tasks() - + if running_tasks: return f"Running: {', '.join(running_tasks[:2])}" else: return f"Completed: {status['completed']}/{status['total']} tasks" - + def _analyze_single_file_worker(self, path: str, comprehensive: bool, **kwargs): """Worker function for process-based parallelization.""" # Create new analyzer instance for process isolation analyzer = AudioAnalyzer(sr=self.sr, hop_length=self.hop_length) return analyzer.analyze_file(path, **kwargs) - + def get_performance_summary(self) -> Dict[str, Any]: """Get performance profiling summary.""" return self.performance_profiler.get_performance_summary() - + def __del__(self): """Cleanup resources.""" try: @@ -608,11 +672,11 @@ def __del__(self): except Exception: # Ignore errors during cleanup pass - + def cleanup(self): """Explicit cleanup method for graceful shutdown.""" try: if hasattr(self, 'system_monitor') and self.system_monitor: self.system_monitor.stop_monitoring() except Exception: - pass \ No newline at end of file + pass diff --git a/src/bpm_detector/section_analyzer.py b/src/bpm_detector/section_analyzer.py index 33d94a0..cc5af69 100644 --- a/src/bpm_detector/section_analyzer.py +++ b/src/bpm_detector/section_analyzer.py @@ -7,61 +7,62 @@ class SectionAnalyzer: """Analyzes and detects musical section characteristics.""" - + # ASCII label definitions (J-Pop terminology) JP_ASCII_LABELS = { - "intro": "Intro", - "verse": "A-melo", - "pre_chorus": "B-melo", - "chorus": "Sabi", - "bridge": "C-melo", + "intro": "Intro", + "verse": "A-melo", + "pre_chorus": "B-melo", + "chorus": "Sabi", + "bridge": "C-melo", "instrumental": "Kansou", - "break": "Break", - "interlude": "Interlude", - "solo": "Solo", - "spoken": "Serifu", - "outro": "Outro", + "break": "Break", + "interlude": "Interlude", + "solo": "Solo", + "spoken": "Serifu", + "outro": "Outro", } - + def __init__(self, hop_length: int = 512): """Initialize section analyzer. - + Args: hop_length: Hop length for analysis """ self.hop_length = hop_length - - def refine_section_labels_with_spectral_analysis(self, y: np.ndarray, sr: int, - sections: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + + def refine_section_labels_with_spectral_analysis( + self, y: np.ndarray, sr: int, sections: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: """Refine section labels using spectral flux analysis. - + Args: y: Audio signal sr: Sample rate sections: List of sections to refine - + Returns: Refined sections list """ refined_sections = [] - + for section in sections: start_sample = int(section['start_time'] * sr) end_sample = int(section['end_time'] * sr) segment = y[start_sample:end_sample] - + if len(segment) == 0: refined_sections.append(section) continue - + # Calculate spectral flux (measure of spectral change) stft = librosa.stft(segment, hop_length=self.hop_length) - spectral_flux = np.sum(np.diff(np.abs(stft), axis=1)**2, axis=0) + spectral_flux = np.sum(np.diff(np.abs(stft), axis=1) ** 2, axis=0) avg_flux = np.mean(spectral_flux) if len(spectral_flux) > 0 else 0 - + # Refine section type based on spectral characteristics refined_type = section['type'] - + # Get characteristics with defaults - handle both dict and list formats characteristics = section.get('characteristics', {}) if isinstance(characteristics, list): @@ -70,11 +71,11 @@ def refine_section_labels_with_spectral_analysis(self, y: np.ndarray, sr: int, 'energy': 0.5, 'spectral_complexity': 0.5, 'harmonic_content': 0.5, - 'rhythmic_density': 0.5 + 'rhythmic_density': 0.5, } energy_level = characteristics.get('energy', 0.5) complexity = characteristics.get('spectral_complexity', 0.5) - + # If classified as outro but has high spectral activity, reclassify if section['type'] == 'outro' and avg_flux > 0.1: if energy_level > 0.5: @@ -83,38 +84,43 @@ def refine_section_labels_with_spectral_analysis(self, y: np.ndarray, sr: int, refined_type = 'bridge' else: refined_type = 'verse' - + # If classified as bridge but has low complexity, might be verse elif section['type'] == 'bridge' and complexity < 0.4: refined_type = 'verse' - + # Enhanced instrumental section classification elif section['type'] == 'instrumental': spectral_features = { 'spectral_centroid': np.array([2000.0]), 'spectral_rolloff': np.array([4000.0]), - 'mfcc': np.random.randn(13, 1) + 'mfcc': np.random.randn(13, 1), } - refined_type = self.classify_instrumental_subtype(section, spectral_features) - + refined_type = self.classify_instrumental_subtype( + section, spectral_features + ) + # Create refined section refined_section = section.copy() refined_section['type'] = refined_type - refined_section['ascii_label'] = self.JP_ASCII_LABELS.get(refined_type, refined_type) + refined_section['ascii_label'] = self.JP_ASCII_LABELS.get( + refined_type, refined_type + ) refined_section['spectral_flux'] = float(avg_flux) - + refined_sections.append(refined_section) - + return refined_sections - - def classify_instrumental_subtype(self, section: Dict[str, Any], - spectral_features: Dict[str, Any] = None) -> str: + + def classify_instrumental_subtype( + self, section: Dict[str, Any], spectral_features: Dict[str, Any] = None + ) -> str: """Classify instrumental sections into more specific subtypes. - + Args: section: Section information spectral_features: Spectral features dictionary (optional) - + Returns: Refined instrumental section type """ @@ -126,229 +132,251 @@ def classify_instrumental_subtype(self, section: Dict[str, Any], 'energy': 0.5, 'spectral_complexity': 0.5, 'harmonic_content': 0.5, - 'rhythmic_density': 0.5 + 'rhythmic_density': 0.5, } energy = characteristics.get('energy', 0.5) complexity = characteristics.get('spectral_complexity', 0.5) - harmonic_content = characteristics.get('harmonic_content', 0.5) rhythmic_density = characteristics.get('rhythmic_density', 0.5) - - duration = section.get('duration', section.get('end_time', 0) - section.get('start_time', 0)) - start_time = section.get('start_time', 0) - + + duration = section.get( + 'duration', section.get('end_time', 0) - section.get('start_time', 0) + ) + # Classify based on position, energy, complexity, and duration - + # Break: Low energy, low complexity, short duration if energy < 0.4 and complexity < 0.4 and duration < 15: return 'breakdown' - + # Solo: High energy, high complexity, medium duration elif energy > 0.6 and complexity > 0.7 and 10 < duration < 30: return 'solo' - + # Interlude: Medium energy, medium-high complexity, longer duration elif 0.4 <= energy <= 0.7 and complexity > 0.5 and duration > 15: return 'interlude' - + # Buildup: High energy, increasing complexity elif energy > 0.7 and rhythmic_density > 0.8: return 'buildup' - + # Default to solo for high-energy sections else: return 'solo' - - def enhance_outro_detection(self, sections: List[Dict[str, Any]], - y: np.ndarray, sr: int) -> List[Dict[str, Any]]: + + def enhance_outro_detection( + self, sections: List[Dict[str, Any]], y: np.ndarray, sr: int + ) -> List[Dict[str, Any]]: """Enhanced outro detection with fade analysis and harmonic resolution. - + Args: sections: List of sections to analyze y: Audio signal sr: Sample rate - + Returns: Sections with improved outro detection """ if not sections: return sections - + enhanced_sections = sections.copy() total_duration = sections[-1]['end_time'] - + # Analyze last 15% of the track for outro candidates outro_threshold_time = total_duration * 0.85 - + for i, section in enumerate(enhanced_sections): if section['start_time'] >= outro_threshold_time: # Check for fade/silence ending is_fade_ending = self.detect_fade_ending(section, y, sr) - + # Check for harmonic resolution (tonic return) - has_harmonic_resolution = self.detect_harmonic_resolution(section, y, sr) - + has_harmonic_resolution = self.detect_harmonic_resolution( + section, y, sr + ) + # Reclassify as outro if conditions are met - if is_fade_ending or (has_harmonic_resolution and section['energy_level'] < 0.5): + if is_fade_ending or ( + has_harmonic_resolution and section['energy_level'] < 0.5 + ): enhanced_sections[i]['type'] = 'outro' - enhanced_sections[i]['ascii_label'] = self.JP_ASCII_LABELS.get('outro', 'outro') - enhanced_sections[i]['outro_confidence'] = 0.8 if is_fade_ending else 0.6 - + enhanced_sections[i]['ascii_label'] = self.JP_ASCII_LABELS.get( + 'outro', 'outro' + ) + enhanced_sections[i]['outro_confidence'] = ( + 0.8 if is_fade_ending else 0.6 + ) + return enhanced_sections - - def detect_fade_ending(self, section: Dict[str, Any], y: np.ndarray, sr: int) -> bool: + + def detect_fade_ending( + self, section: Dict[str, Any], y: np.ndarray, sr: int + ) -> bool: """Detect fade/silence ending pattern. - + Args: section: Section to analyze y: Audio signal sr: Sample rate - + Returns: True if fade ending is detected """ start_sample = int(section['start_time'] * sr) end_sample = int(section['end_time'] * sr) segment = y[start_sample:end_sample] - + if len(segment) < sr: # Less than 1 second return False - + # Analyze energy in 10-second moving windows duration = section.get('duration', section['end_time'] - section['start_time']) window_duration = min(10.0, duration / 2) window_samples = int(window_duration * sr) - + if len(segment) < window_samples * 2: return False - + # Calculate RMS energy for first and last windows first_window = segment[:window_samples] last_window = segment[-window_samples:] - + first_rms = np.sqrt(np.mean(first_window**2)) last_rms = np.sqrt(np.mean(last_window**2)) - + # Convert to dB if first_rms > 0 and last_rms > 0: db_drop = 20 * np.log10(last_rms / first_rms) - + # Check for significant fade (6-12dB drop) if db_drop < -6.0: return True - + # Check for voiced ratio (vocal presence) voiced_frames = 0 total_frames = 0 - + # Simple voiced detection using zero crossing rate frame_length = 2048 for i in range(0, len(segment) - frame_length, frame_length // 2): - frame = segment[i:i + frame_length] + frame = segment[i : i + frame_length] zcr = np.sum(np.diff(np.sign(frame)) != 0) / len(frame) - + # Low ZCR typically indicates voiced content if zcr < 0.1: voiced_frames += 1 total_frames += 1 - + voiced_ratio = voiced_frames / max(total_frames, 1) - + # Low voiced ratio indicates instrumental/fade ending return voiced_ratio < 0.15 - - def detect_harmonic_resolution(self, section: Dict[str, Any], - y: np.ndarray, sr: int) -> bool: + + def detect_harmonic_resolution( + self, section: Dict[str, Any], y: np.ndarray, sr: int + ) -> bool: """Detect harmonic resolution to tonic (key return). - + Args: section: Section to analyze y: Audio signal sr: Sample rate - + Returns: True if harmonic resolution is detected """ start_sample = int(section['start_time'] * sr) end_sample = int(section['end_time'] * sr) segment = y[start_sample:end_sample] - + if len(segment) < sr: # Less than 1 second return False - + # Extract chroma features for harmonic analysis chroma = librosa.feature.chroma_stft(y=segment, sr=sr, hop_length=512) - + # Analyze final bars for tonic presence final_portion = 0.3 # Last 30% of section final_start = int(chroma.shape[1] * (1 - final_portion)) final_chroma = chroma[:, final_start:] - + if final_chroma.shape[1] == 0: return False - + # Calculate average chroma in final portion avg_final_chroma = np.mean(final_chroma, axis=1) - + # Find the most prominent pitch class (potential tonic) tonic_candidate = np.argmax(avg_final_chroma) tonic_strength = avg_final_chroma[tonic_candidate] - + # Check if tonic is significantly stronger than other notes - other_strengths = np.concatenate([ - avg_final_chroma[:tonic_candidate], - avg_final_chroma[tonic_candidate+1:] - ]) - + other_strengths = np.concatenate( + [ + avg_final_chroma[:tonic_candidate], + avg_final_chroma[tonic_candidate + 1 :], + ] + ) + if len(other_strengths) > 0: max_other = np.max(other_strengths) tonic_dominance = tonic_strength / (max_other + 1e-8) - + # Strong tonic presence indicates resolution return bool(tonic_dominance > 1.5 and tonic_strength > 0.5) - + return False - - def detect_chorus_hooks(self, sections: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + + def detect_chorus_hooks( + self, sections: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: """Detect and enforce chorus sections based on hook patterns. - + Args: sections: List of sections to process - + Returns: Processed sections with chorus hooks detected """ if not sections: return sections - + processed = sections.copy() - + for i, section in enumerate(processed): # Hook pattern detection: high energy + brightness + 6-10 bar duration energy_level = section.get('energy_level', 0.0) brightness = section.get('brightness', 0.0) # May not be available - duration = section.get('duration', section['end_time'] - section['start_time']) - + duration = section.get( + 'duration', section['end_time'] - section['start_time'] + ) + # Strong hook pattern criteria - is_hook = (energy_level > 0.65 and - 6 <= duration <= 10 and - section['type'] in ['verse', 'bridge', 'pre_chorus']) # Convert these to chorus - + is_hook = ( + energy_level > 0.65 + and 6 <= duration <= 10 + and section['type'] in ['verse', 'bridge', 'pre_chorus'] + ) # Convert these to chorus + # Additional brightness check if available if brightness > 0.0: # If brightness data is available is_hook = is_hook and brightness > 0.6 - + if is_hook: processed[i]['type'] = 'chorus' - processed[i]['ascii_label'] = self.JP_ASCII_LABELS.get('chorus', 'chorus') - + processed[i]['ascii_label'] = self.JP_ASCII_LABELS.get( + 'chorus', 'chorus' + ) + return processed - + def analyze_form(self, sections: List[Dict[str, Any]]) -> Dict[str, Any]: """Analyze overall musical form. - + Args: sections: List of classified sections - + Returns: Form analysis results """ @@ -358,23 +386,25 @@ def analyze_form(self, sections: List[Dict[str, Any]]) -> Dict[str, Any]: 'repetition_ratio': 0.0, 'structure_complexity': 0.0, 'section_count': 0, - 'total_duration': 0.0 + 'total_duration': 0.0, } - + # Extract section types section_types = [section['type'] for section in sections] - + # Create form string form = ''.join([self._section_to_letter(stype) for stype in section_types]) - + # Calculate repetition ratio unique_sections = len(set(section_types)) total_sections = len(section_types) - repetition_ratio = 1.0 - (unique_sections / total_sections) if total_sections > 0 else 0.0 - + repetition_ratio = ( + 1.0 - (unique_sections / total_sections) if total_sections > 0 else 0.0 + ) + # Calculate structural complexity structural_complexity = self._calculate_structural_complexity(sections) - + # Calculate total duration if sections: last_section = sections[-1] @@ -387,7 +417,7 @@ def analyze_form(self, sections: List[Dict[str, Any]]) -> Dict[str, Any]: total_duration = sum(s.get('duration', 10.0) for s in sections) else: total_duration = 0.0 - + return { 'form': form, 'repetition_ratio': repetition_ratio, @@ -396,51 +426,53 @@ def analyze_form(self, sections: List[Dict[str, Any]]) -> Dict[str, Any]: 'section_count': total_sections, 'total_duration': total_duration, 'unique_sections': unique_sections, - 'section_types': section_types + 'section_types': section_types, } - + def _section_to_letter(self, section_type: str) -> str: """Convert section type to letter for form notation. - + Args: section_type: Section type string - + Returns: Single letter representing the section """ mapping = { - 'intro': 'I', # Intro - 'verse': 'A', # A-melo (Verse) - 'pre_chorus': 'R', # B-melo (Pre-Chorus) - 'chorus': 'B', # Sabi (Chorus) - 'bridge': 'C', # C-melo (Bridge) - 'instrumental': 'D', # Kansou (Instrumental) - 'break': 'K', # Break - 'interlude': 'L', # Interlude - 'solo': 'S', # Solo - 'spoken': 'P', # Serifu (Spoken word/dialogue) - 'outro': 'O' # Outro + 'intro': 'I', # Intro + 'verse': 'A', # A-melo (Verse) + 'pre_chorus': 'R', # B-melo (Pre-Chorus) + 'chorus': 'B', # Sabi (Chorus) + 'bridge': 'C', # C-melo (Bridge) + 'instrumental': 'D', # Kansou (Instrumental) + 'break': 'K', # Break + 'interlude': 'L', # Interlude + 'solo': 'S', # Solo + 'spoken': 'P', # Serifu (Spoken word/dialogue) + 'outro': 'O', # Outro } return mapping.get(section_type, 'X') - + def _calculate_structural_complexity(self, sections: List[Dict[str, Any]]) -> float: """Calculate structural complexity score. - + Args: sections: List of sections - + Returns: Complexity score (0-1) """ if not sections: return 0.0 - + # Factors contributing to complexity: # 1. Number of different section types section_types = [s['type'] for s in sections] unique_types = len(set(section_types)) - type_complexity = min(1.0, unique_types / 6.0) # Normalize by max expected types - + type_complexity = min( + 1.0, unique_types / 6.0 + ) # Normalize by max expected types + # 2. Variation in section durations durations = [] for s in sections: @@ -452,47 +484,55 @@ def _calculate_structural_complexity(self, sections: List[Dict[str, Any]]) -> fl durations.append(10.0) # Default duration if neither available duration_std = np.std(durations) / (np.mean(durations) + 1e-8) duration_complexity = min(1.0, duration_std) - + # 3. Non-standard form patterns form_complexity = 0.0 if len(sections) > 8: # Long form form_complexity += 0.3 if unique_types > 4: # Many section types form_complexity += 0.3 - + # Combine factors - overall_complexity = (type_complexity + duration_complexity + form_complexity) / 3.0 - + overall_complexity = ( + type_complexity + duration_complexity + form_complexity + ) / 3.0 + return min(1.0, overall_complexity) - + def summarize_sections(self, sections: List[Dict[str, Any]]) -> str: """Generate a summary text of sections for display. - + Args: sections: List of sections - + Returns: Summary text string """ lines = [] lines.append(f"Section List (Estimated {len(sections)} sections)") - tmpl = " {idx:>2}. {typ:<8}({ascii}) {start:>6.1f}s - {end:>6.1f}s ({dur:>5.1f}s)" + tmpl = ( + " {idx:>2}. {typ:<8}({ascii}) {start:>6.1f}s - {end:>6.1f}s ({dur:>5.1f}s)" + ) for i, s in enumerate(sections, 1): duration = s.get('duration', s['end_time'] - s['start_time']) - lines.append(tmpl.format(idx=i, - typ=s['type'].capitalize(), - ascii=s.get('ascii_label', s['type']), - start=s['start_time'], - end=s['end_time'], - dur=duration)) + lines.append( + tmpl.format( + idx=i, + typ=s['type'].capitalize(), + ascii=s.get('ascii_label', s['type']), + start=s['start_time'], + end=s['end_time'], + dur=duration, + ) + ) return "\n".join(lines) - + def calculate_energy_scale(self, y: np.ndarray) -> Dict[str, float]: """Calculate adaptive energy scale based on track characteristics. - + Args: y: Audio signal - + Returns: Dictionary with energy statistics """ @@ -500,36 +540,30 @@ def calculate_energy_scale(self, y: np.ndarray) -> Dict[str, float]: rms_values = [] window_size = 2048 hop_size = 1024 - + for i in range(0, len(y) - window_size, hop_size): - window = y[i:i + window_size] + window = y[i : i + window_size] rms = np.sqrt(np.mean(window**2)) if rms > 0: # Avoid zero values rms_values.append(rms) - + if rms_values: # Use 10th and 90th percentiles for robust scaling p10 = np.percentile(rms_values, 10) p90 = np.percentile(rms_values, 90) mean_energy = np.mean(rms_values) - + # Set energy scale based on dynamic range - if p90 > p10: - energy_scale_value = (p10 + p90) / 2.0 - else: - energy_scale_value = 0.05 # Fallback - return { 'min_energy': float(p10), 'max_energy': float(p90), 'mean_energy': float(mean_energy), - 'energy_range': float(p90 - p10) + 'energy_range': float(p90 - p10), } else: - energy_scale_value = 0.05 # Fallback return { 'min_energy': 0.0, 'max_energy': 0.05, 'mean_energy': 0.025, - 'energy_range': 0.05 - } \ No newline at end of file + 'energy_range': 0.05, + } diff --git a/src/bpm_detector/section_processor.py b/src/bpm_detector/section_processor.py index 9f4610e..28fb1d7 100644 --- a/src/bpm_detector/section_processor.py +++ b/src/bpm_detector/section_processor.py @@ -9,43 +9,46 @@ class SectionProcessor: """Post-processes and refines musical sections.""" - + # ASCII label definitions (J-Pop terminology) JP_ASCII_LABELS = { - "intro": "Intro", - "verse": "A-melo", - "pre_chorus": "B-melo", - "chorus": "Sabi", - "bridge": "C-melo", + "intro": "Intro", + "verse": "A-melo", + "pre_chorus": "B-melo", + "chorus": "Sabi", + "bridge": "C-melo", "instrumental": "Kansou", - "break": "Break", - "interlude": "Interlude", - "solo": "Solo", - "spoken": "Serifu", - "outro": "Outro", + "break": "Break", + "interlude": "Interlude", + "solo": "Solo", + "spoken": "Serifu", + "outro": "Outro", } - + # Instrumental aliases (Solo, Interlude, etc.) to be normalized _INSTRUMENTAL_ALIASES = {"solo", "interlude", "buildup", "breakdown"} - + def __init__(self, hop_length: int = 512): """Initialize section processor. - + Args: hop_length: Hop length for analysis """ self.hop_length = hop_length self.jpop_optimizer = JPopStructureOptimizer() self.analyzer = SectionAnalyzer(hop_length) - - def post_process_sections(self, raw: List[Dict[str, Any]], - total_duration: float = None, - merge_threshold: float = None, - bpm: float = 130.0, - y: np.ndarray = None, - sr: int = None) -> List[Dict[str, Any]]: + + def post_process_sections( + self, + raw: List[Dict[str, Any]], + total_duration: float = None, + merge_threshold: float = None, + bpm: float = 130.0, + y: np.ndarray = None, + sr: int = None, + ) -> List[Dict[str, Any]]: """Enhanced post-process sections with fade detection and outro refinement. - + Args: raw: List of raw sections total_duration: Total duration of the audio (optional) @@ -53,22 +56,25 @@ def post_process_sections(self, raw: List[Dict[str, Any]], bpm: BPM for duration calculations y: Audio signal for fade detection (optional) sr: Sample rate for fade detection (optional) - + Returns: List of processed sections """ # Handle backward compatibility - if total_duration is actually bpm (old signature) - if isinstance(total_duration, (int, float)) and merge_threshold is None and len(raw) > 0: + if ( + isinstance(total_duration, (int, float)) + and merge_threshold is None + and len(raw) > 0 + ): # Old signature: post_process_sections(sections, bpm) bpm = total_duration total_duration = None - - + # Add duration field to sections if missing for section in raw: if 'duration' not in section: section['duration'] = section['end_time'] - section['start_time'] - + # Use merge_threshold if provided, otherwise calculate based on BPM if merge_threshold is not None: min_dur = merge_threshold @@ -76,12 +82,14 @@ def post_process_sections(self, raw: List[Dict[str, Any]], else: # Calculate 4-bar duration as minimum section length (stricter) four_bar_duration = (16 * 60.0) / bpm # 16 beats = 4 bars - min_dur = max(6.0, four_bar_duration) # Full 4 bars, minimum 6 seconds (stricter) - + min_dur = max( + 6.0, four_bar_duration + ) # Full 4 bars, minimum 6 seconds (stricter) + # Also calculate 2-bar threshold for very short segments two_bar_duration = (8 * 60.0) / bpm # 8 beats = 2 bars very_short_thresh = max(3.0, two_bar_duration) - + # FIRST: Apply strict chorus chain limitation BEFORE any merging chorus_limited = [] consecutive_chorus_count = 0 @@ -98,13 +106,15 @@ def post_process_sections(self, raw: List[Dict[str, Any]], else: consecutive_chorus_count = 0 chorus_limited.append(sec) - + # SECOND: Break consecutive chorus chains and restore instrumentals if y is not None and sr is not None: - enhanced = self.jpop_optimizer.break_consecutive_chorus_chains(chorus_limited, y, sr, bpm) + enhanced = self.jpop_optimizer.break_consecutive_chorus_chains( + chorus_limited, y, sr, bpm + ) else: enhanced = chorus_limited.copy() - + # SECOND: merge adjacent same types (but preserve intentional breaks and instrumental sections) merged = [] for seg in enhanced: @@ -114,20 +124,27 @@ def post_process_sections(self, raw: List[Dict[str, Any]], # Never merge restored instrumentals (those with vocal_ratio) if seg['type'] == 'instrumental' and seg.get('vocal_ratio') is not None: should_merge = False - elif merged[-1]['type'] == 'instrumental' and merged[-1].get('vocal_ratio') is not None: + elif ( + merged[-1]['type'] == 'instrumental' + and merged[-1].get('vocal_ratio') is not None + ): should_merge = False else: # Check for time gap (if gap > 1 second, don't merge) time_gap = seg['start_time'] - merged[-1]['end_time'] if abs(time_gap) <= 1.0: # Allow small gaps due to rounding should_merge = True - + if should_merge: # Merge sections of same type merged[-1]['end_time'] = seg['end_time'] - merged[-1]['duration'] = merged[-1]['end_time'] - merged[-1]['start_time'] + merged[-1]['duration'] = ( + merged[-1]['end_time'] - merged[-1]['start_time'] + ) # Update ASCII label to match merged type - merged[-1]['ascii_label'] = self.JP_ASCII_LABELS.get(merged[-1]['type'], merged[-1]['type']) + merged[-1]['ascii_label'] = self.JP_ASCII_LABELS.get( + merged[-1]['type'], merged[-1]['type'] + ) else: merged.append(seg) @@ -142,12 +159,14 @@ def post_process_sections(self, raw: List[Dict[str, Any]], if len(enhanced) > 0: # Always merge very short segments into previous enhanced[-1]['end_time'] = seg['end_time'] - enhanced[-1]['duration'] = enhanced[-1]['end_time'] - enhanced[-1]['start_time'] + enhanced[-1]['duration'] = ( + enhanced[-1]['end_time'] - enhanced[-1]['start_time'] + ) continue - + # Enhanced D absorption rule: Always absorb 8 bars after Chorus/Verse/Bridge eight_bar_duration = (8 * 4 * 60.0) / bpm # 8 bars duration - + if seg['type'] == 'instrumental': # Protect restored instrumentals from absorption if seg.get('vocal_ratio') is not None: @@ -155,7 +174,7 @@ def post_process_sections(self, raw: List[Dict[str, Any]], continue should_absorb = False - + # Check if this instrumental follows Chorus/Verse/Bridge if len(enhanced) > 0: prev_type = enhanced[-1]['type'] @@ -166,93 +185,111 @@ def post_process_sections(self, raw: List[Dict[str, Any]], # Also absorb short instrumentals (original logic) elif seg['duration'] < min_dur: should_absorb = True - + if should_absorb: enhanced[-1]['end_time'] = seg['end_time'] - enhanced[-1]['duration'] = enhanced[-1]['end_time'] - enhanced[-1]['start_time'] + enhanced[-1]['duration'] = ( + enhanced[-1]['end_time'] - enhanced[-1]['start_time'] + ) continue - + # Forward merging for remaining short instrumentals if seg['duration'] < min_dur and i + 1 < len(merged): next_seg = merged[i + 1] if next_seg['type'] in ['chorus', 'verse']: # Extend next section to include this instrumental next_seg['start_time'] = seg['start_time'] - next_seg['duration'] = next_seg['end_time'] - next_seg['start_time'] + next_seg['duration'] = ( + next_seg['end_time'] - next_seg['start_time'] + ) continue - + # Standard short segment absorption (4-bar minimum) - but protect restored instrumentals - should_absorb = (seg['duration'] < min_dur and len(enhanced) > 0) - + should_absorb = seg['duration'] < min_dur and len(enhanced) > 0 + # Protect restored instrumentals (those with vocal_ratio information) if seg['type'] == 'instrumental' and seg.get('vocal_ratio') is not None: should_absorb = False - + # Also protect if previous section would absorb an instrumental - if (len(enhanced) > 0 and enhanced[-1]['type'] == 'chorus' and - seg['type'] == 'instrumental' and seg.get('vocal_ratio') is not None): + if ( + len(enhanced) > 0 + and enhanced[-1]['type'] == 'chorus' + and seg['type'] == 'instrumental' + and seg.get('vocal_ratio') is not None + ): should_absorb = False - + if should_absorb: # Absorb short segment into previous section enhanced[-1]['end_time'] = seg['end_time'] - enhanced[-1]['duration'] = enhanced[-1]['end_time'] - enhanced[-1]['start_time'] - + enhanced[-1]['duration'] = ( + enhanced[-1]['end_time'] - enhanced[-1]['start_time'] + ) + # If absorbing changes the character significantly, update type - if seg['duration'] > min_dur * 0.3: # If absorbed segment is substantial + if ( + seg['duration'] > min_dur * 0.3 + ): # If absorbed segment is substantial # Keep the type of the longer segment if seg['duration'] > enhanced[-1]['duration'] * 0.5: # Absorbed segment is significant, consider hybrid classification prev_type = enhanced[-1]['type'] curr_type = seg['type'] - + # Apply smart merging rules - merged_type = self._smart_merge_types(prev_type, curr_type, - enhanced[-1]['duration'], seg['duration']) + merged_type = self._smart_merge_types( + prev_type, + curr_type, + enhanced[-1]['duration'], + seg['duration'], + ) enhanced[-1]['type'] = merged_type - enhanced[-1]['ascii_label'] = self.JP_ASCII_LABELS.get(merged_type, merged_type) + enhanced[-1]['ascii_label'] = self.JP_ASCII_LABELS.get( + merged_type, merged_type + ) else: enhanced.append(seg) - + # Enhanced outro detection with fade analysis if y is not None and sr is not None and len(enhanced) > 0: enhanced = self.analyzer.enhance_outro_detection(enhanced, y, sr) - + # Apply R→B pairing rules first enhanced = self.jpop_optimizer.enforce_pre_chorus_chorus_pairing(enhanced) - + # Apply Pre-Chorus consecutive suppression filter AFTER pairing (order changed) enhanced = self.jpop_optimizer.suppress_consecutive_pre_chorus(enhanced) - + # Apply short bridge downgrade (< 12 bars → verse) enhanced = self._downgrade_short_bridges(enhanced, bpm) - + # Apply ending instrumental cleanup enhanced = self._cleanup_ending_instrumentals(enhanced, bpm) - + # Apply chorus hook detection enhanced = self.analyzer.detect_chorus_hooks(enhanced) - + # Collapse A-R alternating patterns enhanced = self.jpop_optimizer.collapse_alternating_ar_patterns(enhanced) - + # === ⑥ oversized pre_chorus split ============================== - MAX_R_BARS = 16 # 16 bars(≈32s) を超える R は強制 Verse+R - max_r_sec = MAX_R_BARS * 4 * 60 / bpm + MAX_R_BARS = 16 # 16 bars(≈32s) を超える R は強制 Verse+R + max_r_sec = MAX_R_BARS * 4 * 60 / bpm fixed = [] for sec in enhanced: if sec["type"] == "pre_chorus" and sec["duration"] > max_r_sec: # Calculate how many segments we need num_segments = int(np.ceil(sec["duration"] / max_r_sec)) segment_duration = sec["duration"] / num_segments - + current_start = sec["start_time"] for i in range(num_segments): segment = sec.copy() segment["start_time"] = current_start segment["end_time"] = current_start + segment_duration segment["duration"] = segment_duration - + # First segment becomes verse, others remain pre_chorus if i == 0: segment["type"] = "verse" @@ -260,7 +297,7 @@ def post_process_sections(self, raw: List[Dict[str, Any]], else: segment["type"] = "pre_chorus" segment["ascii_label"] = self.JP_ASCII_LABELS["pre_chorus"] - + fixed.append(segment) current_start += segment_duration else: @@ -269,18 +306,21 @@ def post_process_sections(self, raw: List[Dict[str, Any]], # === ⑦ oversize A/R split (Verse & Pre-Chorus) =========== SPLIT_BARS = 16 - split_sec = SPLIT_BARS * 4 * 60 / bpm + split_sec = SPLIT_BARS * 4 * 60 / bpm final = [] for sec in enhanced: if sec["type"] in ["verse", "pre_chorus"] and sec["duration"] > split_sec: - mid = sec["start_time"] + sec["duration"]/2 - first = sec.copy(); last = sec.copy() - first["end_time"] = mid; first["duration"] = mid - first["start_time"] + mid = sec["start_time"] + sec["duration"] / 2 + first = sec.copy() + last = sec.copy() + first["end_time"] = mid + first["duration"] = mid - first["start_time"] # 先半分は Verse とする first["type"] = "verse" first["ascii_label"] = self.JP_ASCII_LABELS["verse"] # 後半は Pre-Chorus とする - last["start_time"] = mid; last["duration"] = sec["end_time"] - mid + last["start_time"] = mid + last["duration"] = sec["end_time"] - mid last["type"] = "pre_chorus" last["ascii_label"] = self.JP_ASCII_LABELS["pre_chorus"] final.extend([first, last]) @@ -296,22 +336,30 @@ def post_process_sections(self, raw: List[Dict[str, Any]], sec["type"] = "instrumental" sec["ascii_label"] = self.JP_ASCII_LABELS["instrumental"] # merge consecutive instrumentals - if merged2 and sec["type"] == "instrumental" and merged2[-1]["type"] == "instrumental": + if ( + merged2 + and sec["type"] == "instrumental" + and merged2[-1]["type"] == "instrumental" + ): merged2[-1]["end_time"] = sec["end_time"] - merged2[-1]["duration"] = merged2[-1]["end_time"] - merged2[-1]["start_time"] + merged2[-1]["duration"] = ( + merged2[-1]["end_time"] - merged2[-1]["start_time"] + ) else: merged2.append(sec) enhanced = merged2 # === ⑨ too-short Chorus fix (<8bars) ===================== - min_chorus = (8 * 4 * 60) / bpm # 8 bars + min_chorus = (8 * 4 * 60) / bpm # 8 bars patched = [] for i, sec in enumerate(enhanced): if sec["type"] == "chorus" and sec["duration"] < min_chorus: # 優先: 直前が R なら吸収 if patched and patched[-1]["type"] == "pre_chorus": patched[-1]["end_time"] = sec["end_time"] - patched[-1]["duration"] = patched[-1]["end_time"] - patched[-1]["start_time"] + patched[-1]["duration"] = ( + patched[-1]["end_time"] - patched[-1]["start_time"] + ) continue # さもなくば Instrumental 扱い sec["type"] = "instrumental" @@ -347,22 +395,26 @@ def post_process_sections(self, raw: List[Dict[str, Any]], # Re-run B-D-B merging after all processing enhanced = self._merge_chorus_instrumental_chorus(enhanced, bpm) - + # Final pass: Ensure all ASCII labels are consistent for section in enhanced: - section['ascii_label'] = self.JP_ASCII_LABELS.get(section['type'], section['type']) - + section['ascii_label'] = self.JP_ASCII_LABELS.get( + section['type'], section['type'] + ) + return enhanced - def _smart_merge_types(self, type1: str, type2: str, dur1: float, dur2: float) -> str: + def _smart_merge_types( + self, type1: str, type2: str, dur1: float, dur2: float + ) -> str: """Smart merging of section types based on musical logic. - + Args: type1: First section type type2: Second section type dur1: Duration of first section dur2: Duration of second section - + Returns: Merged section type """ @@ -371,7 +423,7 @@ def _smart_merge_types(self, type1: str, type2: str, dur1: float, dur2: float) - return type1 elif dur2 > dur1 * 2: return type2 - + # Special handling for short instrumental sections (likely fills/transitions) if type2 == 'instrumental' and dur2 < 8: # Short instrumental # Merge short instrumental into previous section @@ -379,83 +431,99 @@ def _smart_merge_types(self, type1: str, type2: str, dur1: float, dur2: float) - elif type1 == 'instrumental' and dur1 < 8: # Short instrumental # Merge short instrumental into following section return type2 - + # Apply musical logic for merging merge_rules = { - ('verse', 'pre_chorus'): 'verse', # Pre-chorus often merges into verse + ('verse', 'pre_chorus'): 'verse', # Pre-chorus often merges into verse ('pre_chorus', 'verse'): 'verse', - ('pre_chorus', 'chorus'): 'chorus', # Pre-chorus leads to chorus + ('pre_chorus', 'chorus'): 'chorus', # Pre-chorus leads to chorus ('chorus', 'pre_chorus'): 'chorus', - ('chorus', 'instrumental'): 'chorus', # Short instrumental after chorus -> chorus - ('instrumental', 'chorus'): 'chorus', # Short instrumental before chorus -> chorus - ('verse', 'bridge'): 'bridge', # Bridge is more distinctive + ( + 'chorus', + 'instrumental', + ): 'chorus', # Short instrumental after chorus -> chorus + ( + 'instrumental', + 'chorus', + ): 'chorus', # Short instrumental before chorus -> chorus + ('verse', 'bridge'): 'bridge', # Bridge is more distinctive ('bridge', 'verse'): 'bridge', ('instrumental', 'break'): 'instrumental', ('break', 'instrumental'): 'instrumental', - ('intro', 'verse'): 'verse', # Intro usually leads to verse - ('verse', 'outro'): 'outro', # Outro is more distinctive + ('intro', 'verse'): 'verse', # Intro usually leads to verse + ('verse', 'outro'): 'outro', # Outro is more distinctive } - + # Check both directions merged = merge_rules.get((type1, type2)) or merge_rules.get((type2, type1)) if merged: return merged - + # Default: prefer the first type return type1 - - def _merge_chorus_instrumental_chorus(self, sections: List[Dict[str, Any]], bpm: float) -> List[Dict[str, Any]]: + + def _merge_chorus_instrumental_chorus( + self, sections: List[Dict[str, Any]], bpm: float + ) -> List[Dict[str, Any]]: """ Collapse patterns like B(8bars)-D(≤8bars)-B(8bars) into a single extended Chorus with maximum length limit. - + Args: sections: List of sections to process bpm: BPM for bar calculation - + Returns: Processed sections with B-D-B collapsed (max 16 bars per chorus) """ if len(sections) < 3: return sections - eight_bar = (8 * 4 * 60) / bpm # 8 bars (=14.7s @130.5BPM) - tol = 0.1 * eight_bar # 10% tolerance (strict to prevent super-long chorus) - MAX_BARS = 16 # Maximum 16 bars to prevent super-long sections + eight_bar = (8 * 4 * 60) / bpm # 8 bars (=14.7s @130.5BPM) + tol = 0.1 * eight_bar # 10% tolerance (strict to prevent super-long chorus) + MAX_BARS = 16 # Maximum 16 bars to prevent super-long sections changed = True - while changed: # Recursive/multi-pass for reliable merging + while changed: # Recursive/multi-pass for reliable merging changed = False out, i = [], 0 while i < len(sections): - if (i + 2 < len(sections) + if ( + i + 2 < len(sections) and sections[i]['type'] == 'chorus' - and sections[i+1]['type'] == 'instrumental' - and sections[i+2]['type'] == 'chorus' - and sections[i+1]['duration'] <= eight_bar + tol): - + and sections[i + 1]['type'] == 'instrumental' + and sections[i + 2]['type'] == 'chorus' + and sections[i + 1]['duration'] <= eight_bar + tol + ): + # Don't merge if instrumental was restored from consecutive chorus breaking - instrumental_section = sections[i+1] + instrumental_section = sections[i + 1] if instrumental_section.get('vocal_ratio') is not None: # This instrumental was restored, don't merge it back out.append(sections[i]) i += 1 continue - + # Check merged length - merged_duration = sections[i+2]['end_time'] - sections[i]['start_time'] - max_allowed_duration = MAX_BARS * eight_bar / 8 # 16 bars equivalent time - + merged_duration = ( + sections[i + 2]['end_time'] - sections[i]['start_time'] + ) + max_allowed_duration = ( + MAX_BARS * eight_bar / 8 + ) # 16 bars equivalent time + if merged_duration <= max_allowed_duration: # Execute merging within length limit new_sec = sections[i].copy() - new_sec['end_time'] = sections[i+2]['end_time'] + new_sec['end_time'] = sections[i + 2]['end_time'] new_sec['duration'] = merged_duration # Always reset ASCII label after merge to prevent notation inconsistency - new_sec['ascii_label'] = self.JP_ASCII_LABELS.get(new_sec['type'], new_sec['type']) + new_sec['ascii_label'] = self.JP_ASCII_LABELS.get( + new_sec['type'], new_sec['type'] + ) out.append(new_sec) i += 3 - changed = True # Loop again + changed = True # Loop again else: # Cancel merging if length limit exceeded, keep original 3 sections out.append(sections[i]) @@ -465,60 +533,72 @@ def _merge_chorus_instrumental_chorus(self, sections: List[Dict[str, Any]], bpm: i += 1 sections = out return sections - - def _downgrade_short_bridges(self, sections: List[Dict[str, Any]], bpm: float) -> List[Dict[str, Any]]: + + def _downgrade_short_bridges( + self, sections: List[Dict[str, Any]], bpm: float + ) -> List[Dict[str, Any]]: """Downgrade short bridges (< 12 bars) to verse sections. - + Args: sections: List of sections to process bpm: BPM for bar calculation - + Returns: Processed sections with short bridges downgraded """ if not sections: return sections - + # Calculate 12-bar duration threshold - twelve_bar_duration = (12 * 4 * 60.0) / bpm # 12 bars * 4 beats/bar * 60s/min / bpm - + twelve_bar_duration = ( + 12 * 4 * 60.0 + ) / bpm # 12 bars * 4 beats/bar * 60s/min / bpm + processed = sections.copy() - + for i, section in enumerate(processed): if section['type'] == 'bridge': - duration = section.get('duration', section['end_time'] - section['start_time']) + duration = section.get( + 'duration', section['end_time'] - section['start_time'] + ) complexity = section.get('complexity', 0.0) energy_level = section.get('energy_level', 0.0) - + # Enhanced bridge filtering: duration ≥ 12 bars AND complexity > 0.7 - should_downgrade = (duration < twelve_bar_duration or complexity <= 0.7) - + should_downgrade = duration < twelve_bar_duration or complexity <= 0.7 + if should_downgrade: # Downgrade based on energy level if energy_level > 0.55: processed[i]['type'] = 'pre_chorus' - processed[i]['ascii_label'] = self.JP_ASCII_LABELS.get('pre_chorus', 'pre_chorus') + processed[i]['ascii_label'] = self.JP_ASCII_LABELS.get( + 'pre_chorus', 'pre_chorus' + ) else: processed[i]['type'] = 'verse' - processed[i]['ascii_label'] = self.JP_ASCII_LABELS.get('verse', 'verse') - + processed[i]['ascii_label'] = self.JP_ASCII_LABELS.get( + 'verse', 'verse' + ) + return processed - - def _cleanup_ending_instrumentals(self, sections: List[Dict[str, Any]], bpm: float = 130.0) -> List[Dict[str, Any]]: + + def _cleanup_ending_instrumentals( + self, sections: List[Dict[str, Any]], bpm: float = 130.0 + ) -> List[Dict[str, Any]]: """Clean up short instrumental sections at the end of the track. - + Args: sections: List of sections to process bpm: BPM for bar calculation - + Returns: Processed sections with ending instrumentals cleaned up """ if not sections: return sections - + processed = sections.copy() - + # Convert ending instrumental or short verse to outro (≤8 bars) if processed: bar_sec = 4 * 60.0 / bpm # Duration of 1 bar in seconds @@ -528,62 +608,74 @@ def _cleanup_ending_instrumentals(self, sections: List[Dict[str, Any]], bpm: flo last_dur = last.get('duration', last['end_time'] - last['start_time']) # Convert ending instrumental or short verse to outro (≤8 bars) - if (last['type'] in ['instrumental', 'verse'] - and last_dur <= eight_bar_sec): + if last['type'] in ['instrumental', 'verse'] and last_dur <= eight_bar_sec: last['type'] = 'outro' last['ascii_label'] = self.JP_ASCII_LABELS['outro'] - + return processed # Delegate methods to analyzer - def refine_section_labels_with_spectral_analysis(self, y: np.ndarray, sr: int, - sections: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def refine_section_labels_with_spectral_analysis( + self, y: np.ndarray, sr: int, sections: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: """Refine section labels using spectral flux analysis.""" - return self.analyzer.refine_section_labels_with_spectral_analysis(y, sr, sections) - + return self.analyzer.refine_section_labels_with_spectral_analysis( + y, sr, sections + ) + def analyze_form(self, sections: List[Dict[str, Any]]) -> Dict[str, Any]: """Analyze overall musical form.""" return self.analyzer.analyze_form(sections) - + def summarize_sections(self, sections: List[Dict[str, Any]]) -> str: """Generate a summary text of sections for display.""" return self.analyzer.summarize_sections(sections) - + def calculate_energy_scale(self, y: np.ndarray) -> Dict[str, float]: """Calculate adaptive energy scale based on track characteristics.""" return self.analyzer.calculate_energy_scale(y) - + # Backward compatibility methods (delegate to appropriate modules) def _section_to_letter(self, section_type: str) -> str: """Convert section type to letter for form notation.""" return self.analyzer._section_to_letter(section_type) - + def _calculate_structural_complexity(self, sections: List[Dict[str, Any]]) -> float: """Calculate structural complexity score.""" return self.analyzer._calculate_structural_complexity(sections) - - def _classify_instrumental_subtype(self, section: Dict[str, Any], - spectral_features: Dict[str, Any] = None) -> str: + + def _classify_instrumental_subtype( + self, section: Dict[str, Any], spectral_features: Dict[str, Any] = None + ) -> str: """Classify instrumental sections into more specific subtypes.""" return self.analyzer.classify_instrumental_subtype(section, spectral_features) - - def _suppress_consecutive_pre_chorus(self, sections: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + + def _suppress_consecutive_pre_chorus( + self, sections: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: """Suppress consecutive pre-chorus sections to prevent over-segmentation.""" return self.jpop_optimizer.suppress_consecutive_pre_chorus(sections) - - def _enforce_pre_chorus_chorus_pairing(self, sections: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + + def _enforce_pre_chorus_chorus_pairing( + self, sections: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: """Enforce Pre-Chorus → Chorus pairing rules for J-Pop structure.""" return self.jpop_optimizer.enforce_pre_chorus_chorus_pairing(sections) - - def _collapse_alternating_ar_patterns(self, sections: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + + def _collapse_alternating_ar_patterns( + self, sections: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: """Collapse A-R alternating patterns to A-R-B structure.""" return self.jpop_optimizer.collapse_alternating_ar_patterns(sections) - - def _break_consecutive_chorus_chains(self, sections: List[Dict[str, Any]], - y: np.ndarray, sr: int, bpm: float) -> List[Dict[str, Any]]: + + def _break_consecutive_chorus_chains( + self, sections: List[Dict[str, Any]], y: np.ndarray, sr: int, bpm: float + ) -> List[Dict[str, Any]]: """Break up consecutive chorus chains and restore instrumentals.""" return self.jpop_optimizer.break_consecutive_chorus_chains(sections, y, sr, bpm) - - def _detect_vocal_presence(self, y: np.ndarray, sr: int, start_time: float, end_time: float) -> float: + + def _detect_vocal_presence( + self, y: np.ndarray, sr: int, start_time: float, end_time: float + ) -> float: """Detect vocal presence ratio in a given time segment.""" - return self.jpop_optimizer._detect_vocal_presence(y, sr, start_time, end_time) \ No newline at end of file + return self.jpop_optimizer._detect_vocal_presence(y, sr, start_time, end_time) diff --git a/src/bpm_detector/structure_analyzer.py b/src/bpm_detector/structure_analyzer.py index 9d4f0d2..ad2525d 100644 --- a/src/bpm_detector/structure_analyzer.py +++ b/src/bpm_detector/structure_analyzer.py @@ -1,7 +1,7 @@ """Musical structure analysis module - Main coordinator.""" import numpy as np -from typing import List, Tuple, Dict, Any +from typing import List, Dict, Any from .boundary_detector import BoundaryDetector from .section_classifier import SectionClassifier @@ -10,7 +10,7 @@ class StructureAnalyzer: """Analyzes musical structure and form - Main coordinator.""" - + # Enhanced section types with more detailed categories for J-Pop analysis SECTION_TYPES = { 'intro': {'energy_range': (0.1, 0.4), 'complexity_range': (0.1, 0.5)}, @@ -23,84 +23,94 @@ class StructureAnalyzer: 'break': {'energy_range': (0.2, 0.5), 'complexity_range': (0.2, 0.5)}, 'interlude': {'energy_range': (0.4, 0.7), 'complexity_range': (0.5, 0.8)}, 'solo': {'energy_range': (0.5, 0.9), 'complexity_range': (0.6, 1.0)}, - 'spoken': {'energy_range': (0.1, 0.4), 'complexity_range': (0.1, 0.4)} + 'spoken': {'energy_range': (0.1, 0.4), 'complexity_range': (0.1, 0.4)}, } - + # ASCII label definitions (J-Pop terminology) JP_ASCII_LABELS = { - "intro": "Intro", - "verse": "A-melo", - "pre_chorus": "B-melo", - "chorus": "Sabi", - "bridge": "C-melo", + "intro": "Intro", + "verse": "A-melo", + "pre_chorus": "B-melo", + "chorus": "Sabi", + "bridge": "C-melo", "instrumental": "Kansou", - "break": "Break", - "interlude": "Interlude", - "solo": "Solo", - "spoken": "Serifu", - "outro": "Outro", + "break": "Break", + "interlude": "Interlude", + "solo": "Solo", + "spoken": "Serifu", + "outro": "Outro", } - + def __init__(self, hop_length: int = 512, frame_size: int = 4096): """Initialize structure analyzer. - + Args: hop_length: Hop length for analysis frame_size: Frame size for feature extraction """ self.hop_length = hop_length self.frame_size = frame_size - + # Initialize component analyzers self.boundary_detector = BoundaryDetector(hop_length) self.section_classifier = SectionClassifier(hop_length) self.section_processor = SectionProcessor(hop_length) - + def analyze(self, y: np.ndarray, sr: int, bpm: float = 130.0) -> Dict[str, Any]: """Perform complete structural analysis. - + Args: y: Audio signal sr: Sample rate bpm: BPM for dynamic segment length calculation - + Returns: Complete structural analysis results """ # Calculate adaptive energy scale for this track - energy_scale_info = self.section_processor.calculate_energy_scale(y) + self.section_processor.calculate_energy_scale(y) # Note: _energy_scale attribute is no longer used in the new modular design - + # Extract features features = self.boundary_detector.extract_structural_features(y, sr) - + # Compute self-similarity matrix - similarity_matrix = self.boundary_detector.compute_self_similarity_matrix(features) - + similarity_matrix = self.boundary_detector.compute_self_similarity_matrix( + features + ) + # Detect boundaries with dynamic segment length - boundaries = self.boundary_detector.detect_boundaries(similarity_matrix, sr, bpm=bpm) - + boundaries = self.boundary_detector.detect_boundaries( + similarity_matrix, sr, bpm=bpm + ) + # Beat snap alignment boundaries = self.boundary_detector.snap_to_beat(boundaries, sr, bpm) - + # Classify sections with similarity matrix for verse repetition detection - sections = self.section_classifier.classify_sections(y, sr, boundaries, similarity_matrix) - + sections = self.section_classifier.classify_sections( + y, sr, boundaries, similarity_matrix + ) + # Merge and denoise with enhanced processing including fade detection - sections = self.section_processor.post_process_sections(sections, bpm=bpm, y=y, sr=sr) - + sections = self.section_processor.post_process_sections( + sections, bpm=bpm, y=y, sr=sr + ) + # Refine section labels using spectral analysis - sections = self.section_processor.refine_section_labels_with_spectral_analysis(y, sr, sections) - + sections = self.section_processor.refine_section_labels_with_spectral_analysis( + y, sr, sections + ) + # Analyze form form_analysis = self.section_processor.analyze_form(sections) - + # Detect repetitions repetitions = self.boundary_detector.detect_repetitions(similarity_matrix, sr) - + # Generate summary text summary_txt = self.section_processor.summarize_sections(sections) - + return { 'sections': sections, 'section_summary': summary_txt, @@ -110,37 +120,50 @@ def analyze(self, y: np.ndarray, sr: int, bpm: float = 130.0) -> Dict[str, Any]: 'section_count': form_analysis.get('section_count', len(sections)), 'unique_sections': form_analysis.get('unique_sections', 0), 'repetitions': repetitions, - 'boundaries': [b * self.hop_length / sr for b in boundaries] + 'boundaries': [b * self.hop_length / sr for b in boundaries], } - + # Convenience methods for direct access to component functionality - def extract_structural_features(self, y: np.ndarray, sr: int) -> Dict[str, np.ndarray]: + def extract_structural_features( + self, y: np.ndarray, sr: int + ) -> Dict[str, np.ndarray]: """Extract features for structural analysis.""" return self.boundary_detector.extract_structural_features(y, sr) - - def compute_self_similarity_matrix(self, features: Dict[str, np.ndarray]) -> np.ndarray: + + def compute_self_similarity_matrix( + self, features: Dict[str, np.ndarray] + ) -> np.ndarray: """Compute self-similarity matrix from features.""" return self.boundary_detector.compute_self_similarity_matrix(features) - - def detect_boundaries(self, similarity_matrix: np.ndarray, - sr: int, min_segment_length: float = 12.0, bpm: float = 130.0) -> List[int]: + + def detect_boundaries( + self, + similarity_matrix: np.ndarray, + sr: int, + min_segment_length: float = 12.0, + bpm: float = 130.0, + ) -> List[int]: """Detect structural boundaries using novelty detection.""" - return self.boundary_detector.detect_boundaries(similarity_matrix, sr, min_segment_length, bpm) - - def classify_sections(self, y: np.ndarray, sr: int, - boundaries: List[int], bpm: float = 130.0) -> List[Dict[str, Any]]: + return self.boundary_detector.detect_boundaries( + similarity_matrix, sr, min_segment_length, bpm + ) + + def classify_sections( + self, y: np.ndarray, sr: int, boundaries: List[int], bpm: float = 130.0 + ) -> List[Dict[str, Any]]: """Classify sections based on their characteristics.""" return self.section_classifier.classify_sections(y, sr, boundaries) - + def analyze_form(self, sections: List[Dict[str, Any]]) -> Dict[str, Any]: """Analyze overall musical form.""" return self.section_processor.analyze_form(sections) - - def detect_repetitions(self, similarity_matrix: np.ndarray, - sr: int) -> List[Dict[str, Any]]: + + def detect_repetitions( + self, similarity_matrix: np.ndarray, sr: int + ) -> List[Dict[str, Any]]: """Detect repeated sections in the music.""" return self.boundary_detector.detect_repetitions(similarity_matrix, sr) - + def summarize_sections(self, sections: List[Dict[str, Any]]) -> str: """Generate a summary text of sections for display.""" - return self.section_processor.summarize_sections(sections) \ No newline at end of file + return self.section_processor.summarize_sections(sections) diff --git a/tests/test_melody_harmony_analyzer.py b/tests/test_melody_harmony_analyzer.py index 7cfd6d8..f746ded 100644 --- a/tests/test_melody_harmony_analyzer.py +++ b/tests/test_melody_harmony_analyzer.py @@ -2,7 +2,7 @@ import unittest import numpy as np -from unittest.mock import patch, MagicMock +from unittest.mock import patch from src.bpm_detector.melody_harmony_analyzer import MelodyHarmonyAnalyzer @@ -15,86 +15,95 @@ def setUp(self): hop_length=512, fmin=80.0, fmax=2000.0, - consonance_ratings={0: 1.0, 7: 0.8, 4: 0.7, 3: 0.6} + consonance_ratings={0: 1.0, 7: 0.8, 4: 0.7, 3: 0.6}, ) self.sr = 22050 - + # Create synthetic audio with harmonic content duration = 5 # seconds t = np.linspace(0, duration, int(self.sr * duration)) - + # Create a chord-like signal (C major: C-E-G) fundamental = 261.63 # C4 self.harmonic_audio = ( - 0.5 * np.sin(2 * np.pi * fundamental * t) + # C - 0.3 * np.sin(2 * np.pi * fundamental * 5/4 * t) + # E - 0.2 * np.sin(2 * np.pi * fundamental * 3/2 * t) # G + 0.5 * np.sin(2 * np.pi * fundamental * t) # C + + 0.3 * np.sin(2 * np.pi * fundamental * 5 / 4 * t) # E + + 0.2 * np.sin(2 * np.pi * fundamental * 3 / 2 * t) # G ) - + # Create a melodic signal melody_freqs = [261.63, 293.66, 329.63, 349.23] # C-D-E-F - self.melodic_audio = np.concatenate([ - 0.5 * np.sin(2 * np.pi * freq * np.linspace(0, 1, self.sr)) - for freq in melody_freqs - ]) + self.melodic_audio = np.concatenate( + [ + 0.5 * np.sin(2 * np.pi * freq * np.linspace(0, 1, self.sr)) + for freq in melody_freqs + ] + ) @patch('src.bpm_detector.melody_analyzer.MelodyAnalyzer.extract_melody') - @patch('src.bpm_detector.harmony_analyzer.HarmonyAnalyzer.analyze_harmony_complexity') + @patch( + 'src.bpm_detector.harmony_analyzer.HarmonyAnalyzer.analyze_harmony_complexity' + ) @patch('src.bpm_detector.harmony_analyzer.HarmonyAnalyzer.analyze_consonance') @patch('src.bpm_detector.harmony_analyzer.HarmonyAnalyzer.analyze_harmonic_rhythm') - def test_analyze_complete(self, mock_harmonic_rhythm, mock_consonance, - mock_harmony_complexity, mock_extract_melody): + def test_analyze_complete( + self, + mock_harmonic_rhythm, + mock_consonance, + mock_harmony_complexity, + mock_extract_melody, + ): """Test complete melody harmony analysis.""" # Mock melody analyzer results mock_extract_melody.return_value = { 'f0': np.array([261.63, 293.66, 329.63, 349.23]), 'voiced_flag': np.array([True, True, True, True]), - 'voiced_prob': np.array([0.9, 0.8, 0.85, 0.9]) + 'voiced_prob': np.array([0.9, 0.8, 0.85, 0.9]), } - + # Mock harmony analyzer results mock_harmony_complexity.return_value = { 'harmonic_complexity': 0.65, 'spectral_entropy': 0.7, - 'harmonic_change_rate': 0.3 + 'harmonic_change_rate': 0.3, } - + mock_consonance.return_value = { 'consonance_score': 0.75, 'dissonance_score': 0.25, - 'interval_consonance': 0.8 + 'interval_consonance': 0.8, } - + mock_harmonic_rhythm.return_value = { 'harmonic_rhythm': 2.5, 'chord_change_rate': 0.4, - 'harmonic_stability': 0.6 + 'harmonic_stability': 0.6, } - + # Run analysis results = self.analyzer.analyze(self.harmonic_audio, self.sr) - + # Check structure self.assertIsInstance(results, dict) - + # Check main sections expected_sections = ['melody', 'harmony', 'combined_features'] for section in expected_sections: self.assertIn(section, results) - + # Check melody section melody_results = results['melody'] self.assertIn('range', melody_results) self.assertIn('direction', melody_results) self.assertIn('intervals', melody_results) self.assertIn('stability', melody_results) - + # Check harmony section harmony_results = results['harmony'] self.assertIn('complexity', harmony_results) self.assertIn('consonance', harmony_results) self.assertIn('rhythm', harmony_results) - + # Check combined features combined = results['combined_features'] self.assertIn('melody_harmony_balance', combined) @@ -108,24 +117,23 @@ def test_analyze_with_progress_callback(self, mock_extract_melody): mock_extract_melody.return_value = { 'f0': np.array([261.63, 293.66]), 'voiced_flag': np.array([True, True]), - 'voiced_prob': np.array([0.9, 0.8]) + 'voiced_prob': np.array([0.9, 0.8]), } - + # Track progress calls progress_calls = [] - + def progress_callback(progress, message): progress_calls.append((progress, message)) - + # Run analysis with callback - results = self.analyzer.analyze( - self.harmonic_audio, self.sr, - progress_callback=progress_callback + self.analyzer.analyze( + self.harmonic_audio, self.sr, progress_callback=progress_callback ) - + # Check that progress was reported self.assertGreater(len(progress_calls), 0) - + # Check progress values are reasonable for progress, message in progress_calls: self.assertGreaterEqual(progress, 0.0) @@ -139,15 +147,15 @@ def test_analyze_melodic_audio(self, mock_extract_melody): mock_extract_melody.return_value = { 'f0': np.array([261.63, 293.66, 329.63, 349.23]), 'voiced_flag': np.array([True, True, True, True]), - 'voiced_prob': np.array([0.9, 0.8, 0.85, 0.9]) + 'voiced_prob': np.array([0.9, 0.8, 0.85, 0.9]), } - + results = self.analyzer.analyze(self.melodic_audio, self.sr) - + # Should detect melodic content self.assertIsInstance(results, dict) self.assertIn('melody', results) - + # Melody should have reasonable range melody_range = results['melody']['range'] self.assertIn('range_semitones', melody_range) @@ -160,15 +168,15 @@ def test_analyze_harmonic_audio(self, mock_extract_melody): mock_extract_melody.return_value = { 'f0': np.array([261.63, 261.63, 261.63, 261.63]), # Stable pitch 'voiced_flag': np.array([True, True, True, True]), - 'voiced_prob': np.array([0.9, 0.9, 0.9, 0.9]) + 'voiced_prob': np.array([0.9, 0.9, 0.9, 0.9]), } - + results = self.analyzer.analyze(self.harmonic_audio, self.sr) - + # Should detect harmonic content self.assertIsInstance(results, dict) self.assertIn('harmony', results) - + # Harmony should show complexity harmony_complexity = results['harmony']['complexity'] self.assertIn('harmonic_complexity', harmony_complexity) @@ -181,9 +189,9 @@ def test_initialization_with_custom_parameters(self): hop_length=256, fmin=100.0, fmax=1500.0, - consonance_ratings={0: 1.0, 5: 0.9, 7: 0.8} + consonance_ratings={0: 1.0, 5: 0.9, 7: 0.8}, ) - + # Check that parameters are set self.assertEqual(custom_analyzer.hop_length, 256) self.assertEqual(custom_analyzer.fmin, 100.0) @@ -193,7 +201,7 @@ def test_initialization_with_custom_parameters(self): def test_initialization_with_defaults(self): """Test analyzer initialization with default parameters.""" default_analyzer = MelodyHarmonyAnalyzer() - + # Check that defaults are reasonable self.assertEqual(default_analyzer.hop_length, 512) self.assertEqual(default_analyzer.fmin, 80.0) @@ -207,11 +215,11 @@ def test_empty_audio_handling(self, mock_extract_melody): mock_extract_melody.return_value = { 'f0': np.array([]), 'voiced_flag': np.array([]), - 'voiced_prob': np.array([]) + 'voiced_prob': np.array([]), } - + empty_audio = np.array([]) - + try: results = self.analyzer.analyze(empty_audio, self.sr) # Should handle empty input gracefully @@ -227,14 +235,16 @@ def test_short_audio_handling(self, mock_extract_melody): mock_extract_melody.return_value = { 'f0': np.array([261.63]), 'voiced_flag': np.array([True]), - 'voiced_prob': np.array([0.9]) + 'voiced_prob': np.array([0.9]), } - + # Create 0.5 second audio - short_audio = 0.5 * np.sin(2 * np.pi * 440 * np.linspace(0, 0.5, int(self.sr * 0.5))) - + short_audio = 0.5 * np.sin( + 2 * np.pi * 440 * np.linspace(0, 0.5, int(self.sr * 0.5)) + ) + results = self.analyzer.analyze(short_audio, self.sr) - + # Should handle short audio self.assertIsInstance(results, dict) self.assertIn('melody', results) @@ -247,20 +257,22 @@ def test_noise_audio_handling(self, mock_extract_melody): mock_extract_melody.return_value = { 'f0': np.array([0, 0, 261.63, 0]), # Sparse melody 'voiced_flag': np.array([False, False, True, False]), - 'voiced_prob': np.array([0.1, 0.2, 0.8, 0.1]) + 'voiced_prob': np.array([0.1, 0.2, 0.8, 0.1]), } - + # Create noisy audio noise_audio = 0.1 * np.random.randn(self.sr * 2) - + results = self.analyzer.analyze(noise_audio, self.sr) - + # Should handle noisy input self.assertIsInstance(results, dict) - + # Results should reflect low musical content if 'combined_features' in results: - sophistication = results['combined_features'].get('musical_sophistication', 0) + sophistication = results['combined_features'].get( + 'musical_sophistication', 0 + ) self.assertLessEqual(sophistication, 0.5) # Should be low for noise @patch('src.bpm_detector.melody_analyzer.MelodyAnalyzer.extract_melody') @@ -270,26 +282,26 @@ def test_combined_features_calculation(self, mock_extract_melody): mock_extract_melody.return_value = { 'f0': np.array([261.63, 293.66, 329.63]), 'voiced_flag': np.array([True, True, True]), - 'voiced_prob': np.array([0.9, 0.8, 0.85]) + 'voiced_prob': np.array([0.9, 0.8, 0.85]), } - + results = self.analyzer.analyze(self.harmonic_audio, self.sr) - + # Check combined features combined = results['combined_features'] - + # Check balance score self.assertIn('melody_harmony_balance', combined) balance = combined['melody_harmony_balance'] self.assertGreaterEqual(balance, 0.0) self.assertLessEqual(balance, 1.0) - + # Check overall complexity self.assertIn('overall_complexity', combined) complexity = combined['overall_complexity'] self.assertGreaterEqual(complexity, 0.0) self.assertLessEqual(complexity, 1.0) - + # Check musical sophistication self.assertIn('musical_sophistication', combined) sophistication = combined['musical_sophistication'] @@ -298,14 +310,14 @@ def test_combined_features_calculation(self, mock_extract_melody): def test_progress_callback_error_handling(self): """Test that analysis continues even if progress callback fails.""" + def failing_callback(progress, message): raise Exception("Callback error") - + # Should not crash even with failing callback try: results = self.analyzer.analyze( - self.harmonic_audio, self.sr, - progress_callback=failing_callback + self.harmonic_audio, self.sr, progress_callback=failing_callback ) # Analysis should complete despite callback failure self.assertIsInstance(results, dict) @@ -315,4 +327,4 @@ def failing_callback(progress, message): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_music_analyzer.py b/tests/test_music_analyzer.py index e65286c..1612804 100644 --- a/tests/test_music_analyzer.py +++ b/tests/test_music_analyzer.py @@ -2,7 +2,6 @@ import unittest import numpy as np -import librosa import soundfile as sf import tempfile import os @@ -11,131 +10,140 @@ class TestAudioAnalyzer(unittest.TestCase): """Test cases for AudioAnalyzer.""" - + def setUp(self): """Set up test fixtures.""" self.analyzer = AudioAnalyzer() self.sr = 22050 self.duration = 10.0 # 10 seconds - + # Create a test audio file self.test_audio = self._create_test_audio() - + # Create temporary file self.temp_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) sf.write(self.temp_file.name, self.test_audio, self.sr) self.temp_file.close() - + def tearDown(self): """Clean up test fixtures.""" # Remove temporary file if os.path.exists(self.temp_file.name): os.unlink(self.temp_file.name) - + def _create_test_audio(self): """Create a test audio signal.""" t = np.linspace(0, self.duration, int(self.sr * self.duration)) - + # Create a musical signal with multiple characteristics signal = np.zeros_like(t) - + # Add a chord progression (C - Am - F - G) chord_duration = self.duration / 4 - + # C major chord mask1 = t < chord_duration signal[mask1] += 0.3 * ( - np.sin(2 * np.pi * 261.63 * t[mask1]) + # C - np.sin(2 * np.pi * 329.63 * t[mask1]) + # E - np.sin(2 * np.pi * 392.00 * t[mask1]) # G + np.sin(2 * np.pi * 261.63 * t[mask1]) # C + + np.sin(2 * np.pi * 329.63 * t[mask1]) # E + + np.sin(2 * np.pi * 392.00 * t[mask1]) # G ) - + # A minor chord mask2 = (t >= chord_duration) & (t < 2 * chord_duration) signal[mask2] += 0.3 * ( - np.sin(2 * np.pi * 220.00 * t[mask2]) + # A - np.sin(2 * np.pi * 261.63 * t[mask2]) + # C - np.sin(2 * np.pi * 329.63 * t[mask2]) # E + np.sin(2 * np.pi * 220.00 * t[mask2]) # A + + np.sin(2 * np.pi * 261.63 * t[mask2]) # C + + np.sin(2 * np.pi * 329.63 * t[mask2]) # E ) - + # F major chord mask3 = (t >= 2 * chord_duration) & (t < 3 * chord_duration) signal[mask3] += 0.3 * ( - np.sin(2 * np.pi * 174.61 * t[mask3]) + # F - np.sin(2 * np.pi * 220.00 * t[mask3]) + # A - np.sin(2 * np.pi * 261.63 * t[mask3]) # C + np.sin(2 * np.pi * 174.61 * t[mask3]) # F + + np.sin(2 * np.pi * 220.00 * t[mask3]) # A + + np.sin(2 * np.pi * 261.63 * t[mask3]) # C ) - + # G major chord mask4 = t >= 3 * chord_duration signal[mask4] += 0.3 * ( - np.sin(2 * np.pi * 196.00 * t[mask4]) + # G - np.sin(2 * np.pi * 246.94 * t[mask4]) + # B - np.sin(2 * np.pi * 293.66 * t[mask4]) # D + np.sin(2 * np.pi * 196.00 * t[mask4]) # G + + np.sin(2 * np.pi * 246.94 * t[mask4]) # B + + np.sin(2 * np.pi * 293.66 * t[mask4]) # D ) - + # Add rhythm (120 BPM = 2 beats per second) beat_freq = 2.0 beat_pattern = (np.sin(2 * np.pi * beat_freq * t) > 0).astype(float) - signal *= (0.7 + 0.3 * beat_pattern) - + signal *= 0.7 + 0.3 * beat_pattern + # Add some noise for realism signal += 0.05 * np.random.randn(len(t)) - + # Normalize signal = signal / np.max(np.abs(signal)) - + return signal - + def test_basic_analysis(self): """Test basic BPM and key analysis.""" results = self.analyzer.analyze_file( - self.temp_file.name, - detect_key=True, - comprehensive=False + self.temp_file.name, detect_key=True, comprehensive=False ) - + # Check basic structure self.assertIn('basic_info', results) basic_info = results['basic_info'] - + # Check required fields - required_fields = ['filename', 'duration', 'bpm', 'bpm_confidence', 'key', 'key_confidence'] + required_fields = [ + 'filename', + 'duration', + 'bpm', + 'bpm_confidence', + 'key', + 'key_confidence', + ] for field in required_fields: self.assertIn(field, basic_info) - + # Check types and ranges self.assertIsInstance(basic_info['bpm'], float) self.assertGreater(basic_info['bpm'], 60) self.assertLess(basic_info['bpm'], 200) - + self.assertIsInstance(basic_info['bpm_confidence'], float) self.assertGreaterEqual(basic_info['bpm_confidence'], 0) self.assertLessEqual(basic_info['bpm_confidence'], 100) - + self.assertIsInstance(basic_info['duration'], float) self.assertAlmostEqual(basic_info['duration'], self.duration, delta=0.5) - + def test_comprehensive_analysis(self): """Test comprehensive analysis.""" results = self.analyzer.analyze_file( - self.temp_file.name, - detect_key=True, - comprehensive=True + self.temp_file.name, detect_key=True, comprehensive=True ) - + # Check basic analysis sections are present basic_sections = ['basic_info', 'chord_progression'] - + for section in basic_sections: self.assertIn(section, results) - + # Check optional sections if present optional_sections = [ - 'rhythm', 'structure', 'timbre', 'melody_harmony', 'dynamics', - 'similarity_features', 'reference_tags', 'production_notes' + 'rhythm', + 'structure', + 'timbre', + 'melody_harmony', + 'dynamics', + 'similarity_features', + 'reference_tags', + 'production_notes', ] - + for section in optional_sections: if section in results: # reference_tags is expected to be a list, others should be dict @@ -143,68 +151,75 @@ def test_comprehensive_analysis(self): self.assertIsInstance(results[section], list) else: self.assertIsInstance(results[section], dict) - + # Check chord progression analysis chord_prog = results['chord_progression'] self.assertIn('main_progression', chord_prog) self.assertIn('harmonic_rhythm', chord_prog) self.assertIn('chord_complexity', chord_prog) - + # Check optional sections if present if 'structure' in results: structure = results['structure'] # Check for any structure fields self.assertIsInstance(structure, dict) - + if 'rhythm' in results: rhythm = results['rhythm'] # Check for any rhythm fields self.assertIsInstance(rhythm, dict) - + if 'timbre' in results: timbre = results['timbre'] # Check for any timbre fields self.assertIsInstance(timbre, dict) - + if 'melody_harmony' in results: melody_harmony = results['melody_harmony'] # Check for any melody harmony fields self.assertIsInstance(melody_harmony, dict) - + if 'dynamics' in results: dynamics = results['dynamics'] # Check for any dynamics fields self.assertIsInstance(dynamics, dict) - + if 'similarity_features' in results: similarity_features = results['similarity_features'] # Check for any similarity features self.assertIsInstance(similarity_features, dict) - + def test_generate_reference_tags(self): """Test reference tag generation.""" # Create mock results mock_results = { 'basic_info': {'bpm': 120, 'key': 'C Major'}, - 'rhythm': {'time_signature': '4/4', 'groove_type': 'straight', 'syncopation_level': 0.3}, + 'rhythm': { + 'time_signature': '4/4', + 'groove_type': 'straight', + 'syncopation_level': 0.3, + }, 'structure': {'structural_complexity': 0.5}, 'timbre': { - 'dominant_instruments': [{'instrument': 'piano'}, {'instrument': 'guitar'}], - 'brightness': 0.7 + 'dominant_instruments': [ + {'instrument': 'piano'}, + {'instrument': 'guitar'}, + ], + 'brightness': 0.7, }, - 'dynamics': {'overall_energy': 0.6} + 'dynamics': {'overall_energy': 0.6}, } - + tags = self.analyzer._generate_reference_tags(mock_results) - + # Should return a list of strings self.assertIsInstance(tags, list) self.assertTrue(all(isinstance(tag, str) for tag in tags)) - + # Should contain expected tags based on mock data self.assertIn('upbeat', tags) # 120 BPM self.assertIn('major-key', tags) # C Major - + def test_generate_production_notes(self): """Test production notes generation.""" # Create mock results @@ -214,38 +229,33 @@ def test_generate_production_notes(self): 'dominant_instruments': [ {'instrument': 'guitar'}, {'instrument': 'drums'}, - {'instrument': 'piano'} + {'instrument': 'piano'}, ], - 'brightness': 0.7 + 'brightness': 0.7, }, - 'dynamics': { - 'dynamic_range': {'dynamic_range_db': 15.0} - } + 'dynamics': {'dynamic_range': {'dynamic_range_db': 15.0}}, } - + notes = self.analyzer._generate_production_notes(mock_results) - + # Should return a dictionary self.assertIsInstance(notes, dict) - + # Should contain expected keys self.assertIn('arrangement_density', notes) self.assertIn('production_style', notes) self.assertIn('mix_characteristics', notes) - + def test_generate_reference_sheet(self): """Test reference sheet generation.""" # Use comprehensive analysis results - results = self.analyzer.analyze_file( - self.temp_file.name, - comprehensive=True - ) - + results = self.analyzer.analyze_file(self.temp_file.name, comprehensive=True) + reference_sheet = self.analyzer.generate_reference_sheet(results) - + # Should return a string self.assertIsInstance(reference_sheet, str) - + # Should contain expected sections expected_sections = [ '# Music Production Reference Sheet', @@ -255,54 +265,52 @@ def test_generate_reference_sheet(self): '## Rhythm & Groove', '## Instrumentation & Timbre', '## Melody & Harmony', - '## Dynamics & Energy' + '## Dynamics & Energy', ] - + for section in expected_sections: self.assertIn(section, reference_sheet) - + def test_progress_callback(self): """Test progress callback functionality.""" progress_values = [] - + def progress_callback(value): progress_values.append(value) - - results = self.analyzer.analyze_file( - self.temp_file.name, - comprehensive=True, - progress_callback=progress_callback + + self.analyzer.analyze_file( + self.temp_file.name, comprehensive=True, progress_callback=progress_callback ) - + # Should have received progress updates self.assertGreater(len(progress_values), 0) - + # Progress values should be between 0 and 100 for value in progress_values: self.assertGreaterEqual(value, 0) self.assertLessEqual(value, 100) - + # Should end at a reasonable progress value self.assertGreater(progress_values[-1], 0) - + def test_error_handling(self): """Test error handling with invalid input.""" # Test with non-existent file try: - results = self.analyzer.analyze_file('non_existent_file.wav') + self.analyzer.analyze_file('non_existent_file.wav') # Should either handle gracefully or raise appropriate exception except Exception as e: # Exception should be informative self.assertIsInstance(e, (FileNotFoundError, OSError)) - + def test_analyzer_initialization(self): """Test analyzer initialization.""" # Test with custom parameters custom_analyzer = AudioAnalyzer(sr=44100, hop_length=256) - + self.assertEqual(custom_analyzer.sr, 44100) self.assertEqual(custom_analyzer.hop_length, 256) - + # Check that all sub-analyzers are initialized self.assertIsNotNone(custom_analyzer.bpm_detector) self.assertIsNotNone(custom_analyzer.key_detector) @@ -313,70 +321,63 @@ def test_analyzer_initialization(self): self.assertIsNotNone(custom_analyzer.melody_harmony_analyzer) self.assertIsNotNone(custom_analyzer.dynamics_analyzer) self.assertIsNotNone(custom_analyzer.similarity_engine) - + def test_format_production_notes(self): """Test production notes formatting.""" mock_notes = { 'arrangement_density': 'medium', 'production_style': 'rock_pop', - 'mix_characteristics': ['bright_mix', 'punchy_drums'] + 'mix_characteristics': ['bright_mix', 'punchy_drums'], } - + formatted = self.analyzer._format_production_notes(mock_notes) - + # Should return a string self.assertIsInstance(formatted, str) - + # Should contain the notes information self.assertIn('medium', formatted) self.assertIn('rock_pop', formatted) - + def test_bpm_parameter_passing(self): """Test BPM parameter passing.""" results = self.analyzer.analyze_file( - self.temp_file.name, - min_bpm=100, - max_bpm=140, - start_bpm=120 + self.temp_file.name, min_bpm=100, max_bpm=140, start_bpm=120 ) - + # Should complete without error self.assertIn('basic_info', results) - + # BPM should be within specified range (approximately) detected_bpm = results['basic_info']['bpm'] self.assertGreaterEqual(detected_bpm, 80) # Allow some tolerance self.assertLessEqual(detected_bpm, 160) - def test_new_structure_analysis_features(self): """Test new structure analysis features.""" - results = self.analyzer.analyze_file( - self.temp_file.name, - comprehensive=True - ) - + results = self.analyzer.analyze_file(self.temp_file.name, comprehensive=True) + # Check structure analysis if present if 'structure' not in results: self.skipTest("Structure analysis not available") - + structure = results['structure'] - + # Should have sections with detailed information if 'sections' in structure: sections = structure['sections'] self.assertIsInstance(sections, list) - + # Each section should have required fields for section in sections: self.assertIsInstance(section, dict) expected_fields = ['start_time', 'end_time', 'type'] for field in expected_fields: self.assertIn(field, section) - + # Should have form analysis self.assertIn('form', structure) - + # Should have structural complexity if 'structural_complexity' in structure: complexity = structure['structural_complexity'] @@ -386,40 +387,39 @@ def test_new_structure_analysis_features(self): def test_enhanced_timbre_analysis(self): """Test enhanced timbre analysis features.""" - results = self.analyzer.analyze_file( - self.temp_file.name, - comprehensive=True - ) - + results = self.analyzer.analyze_file(self.temp_file.name, comprehensive=True) + if 'timbre' not in results: self.skipTest("Timbre analysis not available") - + timbre = results['timbre'] - + # Check for enhanced timbre features expected_features = ['brightness', 'warmth', 'roughness', 'density'] for feature in expected_features: if feature in timbre: # Accept numpy types as well as Python types - self.assertIsInstance(timbre[feature], (int, float, np.integer, np.floating)) + self.assertIsInstance( + timbre[feature], (int, float, np.integer, np.floating) + ) self.assertGreaterEqual(float(timbre[feature]), 0.0) self.assertLessEqual(float(timbre[feature]), 1.0) - + # Check instrument classification if 'instruments' in timbre: instruments = timbre['instruments'] self.assertIsInstance(instruments, list) - + for instrument in instruments: self.assertIsInstance(instrument, dict) self.assertIn('instrument', instrument) self.assertIn('confidence', instrument) - + # Check effects analysis if 'effects' in timbre: effects = timbre['effects'] self.assertIsInstance(effects, dict) - + for effect_name, effect_value in effects.items(): self.assertIsInstance(effect_value, (int, float)) self.assertGreaterEqual(effect_value, 0.0) @@ -427,16 +427,13 @@ def test_enhanced_timbre_analysis(self): def test_enhanced_dynamics_analysis(self): """Test enhanced dynamics analysis features.""" - results = self.analyzer.analyze_file( - self.temp_file.name, - comprehensive=True - ) - + results = self.analyzer.analyze_file(self.temp_file.name, comprehensive=True) + if 'dynamics' not in results: self.skipTest("Dynamics analysis not available") - + dynamics = results['dynamics'] - + # Check for enhanced dynamics features if 'climax_points' in dynamics: climax = dynamics['climax_points'] @@ -453,12 +450,12 @@ def test_enhanced_dynamics_analysis(self): self.assertIsInstance(point, dict) self.assertIn('time', point) self.assertIn('intensity', point) - + # Check energy distribution if 'energy_distribution' in dynamics: energy_dist = dynamics['energy_distribution'] self.assertIsInstance(energy_dist, dict) - + # Check energy ratios ratio_fields = ['low_energy_ratio', 'mid_energy_ratio', 'high_energy_ratio'] for field in ratio_fields: @@ -470,41 +467,38 @@ def test_enhanced_dynamics_analysis(self): def test_melody_harmony_integration(self): """Test melody harmony analysis integration.""" - results = self.analyzer.analyze_file( - self.temp_file.name, - comprehensive=True - ) - + results = self.analyzer.analyze_file(self.temp_file.name, comprehensive=True) + if 'melody_harmony' not in results: self.skipTest("Melody harmony analysis not available") - + melody_harmony = results['melody_harmony'] - + # Check for melody features if 'melody' in melody_harmony: melody = melody_harmony['melody'] self.assertIsInstance(melody, dict) - + # Check melody range if 'range' in melody: melody_range = melody['range'] self.assertIsInstance(melody_range, dict) - + # Check for harmony features if 'harmony' in melody_harmony: harmony = melody_harmony['harmony'] self.assertIsInstance(harmony, dict) - + # Check harmony complexity if 'complexity' in harmony: complexity = harmony['complexity'] self.assertIsInstance(complexity, dict) - + # Check combined features if 'combined_features' in melody_harmony: combined = melody_harmony['combined_features'] self.assertIsInstance(combined, dict) - + # Check balance score if 'melody_harmony_balance' in combined: balance = combined['melody_harmony_balance'] @@ -514,28 +508,25 @@ def test_melody_harmony_integration(self): def test_similarity_features_generation(self): """Test similarity features generation.""" - results = self.analyzer.analyze_file( - self.temp_file.name, - comprehensive=True - ) - + results = self.analyzer.analyze_file(self.temp_file.name, comprehensive=True) + if 'similarity_features' not in results: self.skipTest("Similarity features not available") - + similarity_features = results['similarity_features'] - + # Check feature vector self.assertIn('feature_vector', similarity_features) feature_vector = similarity_features['feature_vector'] # feature_vector can be either numpy array or list self.assertIsInstance(feature_vector, (np.ndarray, list)) self.assertGreater(len(feature_vector), 0) - + # Check feature weights if 'feature_weights' in similarity_features: weights = similarity_features['feature_weights'] self.assertIsInstance(weights, dict) - + # Check feature metadata if 'feature_metadata' in similarity_features: metadata = similarity_features['feature_metadata'] @@ -545,26 +536,23 @@ def test_comprehensive_error_handling(self): """Test comprehensive error handling.""" # Test with very short audio short_audio = np.sin(2 * np.pi * 440 * np.linspace(0, 0.1, int(self.sr * 0.1))) - + # Create temporary short file short_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) sf.write(short_file.name, short_audio, self.sr) short_file.close() - + try: - results = self.analyzer.analyze_file( - short_file.name, - comprehensive=True - ) - + results = self.analyzer.analyze_file(short_file.name, comprehensive=True) + # Should handle short audio gracefully self.assertIsInstance(results, dict) self.assertIn('basic_info', results) - + except Exception as e: # If exception is raised, it should be handled gracefully self.assertIsInstance(e, Exception) - + finally: # Clean up if os.path.exists(short_file.name): @@ -573,19 +561,19 @@ def test_comprehensive_error_handling(self): def test_progress_callback_with_comprehensive_analysis(self): """Test progress callback with comprehensive analysis.""" progress_updates = [] - + def detailed_progress_callback(progress, message=""): progress_updates.append((progress, message)) - - results = self.analyzer.analyze_file( + + self.analyzer.analyze_file( self.temp_file.name, comprehensive=True, - progress_callback=detailed_progress_callback + progress_callback=detailed_progress_callback, ) - + # Should have received detailed progress updates self.assertGreater(len(progress_updates), 0) - + # Check progress format for progress, message in progress_updates: self.assertIsInstance(progress, (int, float)) @@ -597,48 +585,42 @@ def test_analyzer_with_different_audio_characteristics(self): """Test analyzer with different types of audio.""" # Test with pure sine wave sine_audio = np.sin(2 * np.pi * 440 * np.linspace(0, 5, int(self.sr * 5))) - + sine_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) sf.write(sine_file.name, sine_audio, self.sr) sine_file.close() - + try: - results = self.analyzer.analyze_file( - sine_file.name, - comprehensive=True - ) - + results = self.analyzer.analyze_file(sine_file.name, comprehensive=True) + # Should handle pure sine wave self.assertIsInstance(results, dict) self.assertIn('basic_info', results) - + # Timbre should reflect simple harmonic content if 'timbre' in results: timbre = results['timbre'] # Simple sine wave should have low complexity if 'density' in timbre: self.assertLess(timbre['density'], 0.5) - + finally: if os.path.exists(sine_file.name): os.unlink(sine_file.name) def test_reference_sheet_completeness(self): """Test reference sheet completeness.""" - results = self.analyzer.analyze_file( - self.temp_file.name, - comprehensive=True - ) - + results = self.analyzer.analyze_file(self.temp_file.name, comprehensive=True) + reference_sheet = self.analyzer.generate_reference_sheet(results) - + # Check for new sections in reference sheet new_sections = [ '## Production Notes', '## Reference Tags', - '## Similarity Features' + '## Similarity Features', ] - + for section in new_sections: # These sections might be present depending on implementation if section in reference_sheet: @@ -649,10 +631,12 @@ def test_reference_sheet_completeness(self): section_content = reference_sheet[section_index:] else: section_content = reference_sheet[section_index:next_section_index] - + # Should have content (allowing for minimal content) - self.assertGreaterEqual(len(section_content.strip()), len(section.strip())) + self.assertGreaterEqual( + len(section_content.strip()), len(section.strip()) + ) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_parallel_analyzer.py b/tests/test_parallel_analyzer.py index 665ff41..9a0f11d 100644 --- a/tests/test_parallel_analyzer.py +++ b/tests/test_parallel_analyzer.py @@ -6,16 +6,20 @@ import shutil import numpy as np import soundfile as sf -from unittest.mock import patch, MagicMock +from unittest.mock import patch from src.bpm_detector.parallel_analyzer import SmartParallelAudioAnalyzer -from src.bpm_detector.auto_parallel import AutoParallelConfig, SystemMonitor, ParallelConfig +from src.bpm_detector.auto_parallel import ( + AutoParallelConfig, + SystemMonitor, + ParallelConfig, +) from src.bpm_detector.progress_manager import ProgressManager, TaskStatus class TestAutoParallelConfig(unittest.TestCase): """Test automatic parallel configuration.""" - + @patch('src.bpm_detector.auto_parallel.cpu_count') @patch('src.bpm_detector.auto_parallel.psutil') def test_high_performance_config(self, mock_psutil, mock_cpu_count): @@ -25,14 +29,14 @@ def test_high_performance_config(self, mock_psutil, mock_cpu_count): mock_psutil.virtual_memory.return_value.available = 16 * 1024**3 # 16GB mock_psutil.cpu_percent.return_value = 25.0 mock_psutil.virtual_memory.return_value.percent = 50.0 - + config = AutoParallelConfig.get_optimal_config() - + self.assertTrue(config.enable_parallel) self.assertEqual(config.max_workers, 10) # 12 - 2 self.assertTrue(config.use_process_pool) self.assertGreater(config.memory_limit_mb, 1000) - + @patch('src.bpm_detector.auto_parallel.cpu_count') @patch('src.bpm_detector.auto_parallel.psutil') def test_low_performance_config(self, mock_psutil, mock_cpu_count): @@ -42,24 +46,22 @@ def test_low_performance_config(self, mock_psutil, mock_cpu_count): mock_psutil.virtual_memory.return_value.available = 4 * 1024**3 # 4GB mock_psutil.cpu_percent.return_value = 80.0 mock_psutil.virtual_memory.return_value.percent = 75.0 - + config = AutoParallelConfig.get_optimal_config() - + self.assertFalse(config.enable_parallel) self.assertEqual(config.max_workers, 1) - + def test_file_count_adjustment(self): """Test configuration adjustment based on file count.""" base_config = ParallelConfig( - enable_parallel=True, - max_workers=8, - use_process_pool=False + enable_parallel=True, max_workers=8, use_process_pool=False ) - + # Single file single_config = AutoParallelConfig.get_file_count_adjustment(1, base_config) self.assertLessEqual(single_config.max_workers, 6) - + # Many files many_config = AutoParallelConfig.get_file_count_adjustment(20, base_config) self.assertTrue(many_config.use_process_pool) @@ -68,36 +70,36 @@ def test_file_count_adjustment(self): class TestSystemMonitor(unittest.TestCase): """Test system monitoring functionality.""" - + def test_monitor_initialization(self): """Test monitor initialization.""" monitor = SystemMonitor(check_interval=0.1) self.assertFalse(monitor.monitoring) self.assertEqual(monitor.current_load['cpu'], 0) self.assertEqual(monitor.current_load['memory'], 0) - + @patch('src.bpm_detector.auto_parallel.psutil') def test_load_detection(self, mock_psutil): """Test load detection logic.""" monitor = SystemMonitor() - + # High load scenario monitor.current_load = {'cpu': 95, 'memory': 90} self.assertTrue(monitor.should_reduce_parallelism()) - + # Low load scenario monitor.current_load = {'cpu': 30, 'memory': 40} self.assertFalse(monitor.should_reduce_parallelism()) - + def test_worker_recommendation(self): """Test worker count recommendation.""" monitor = SystemMonitor() - + # High load - should reduce monitor.current_load = {'cpu': 95, 'memory': 90} recommended = monitor.get_recommended_workers(8) self.assertEqual(recommended, 4) # Half of current - + # Low load - should increase monitor.current_load = {'cpu': 30, 'memory': 40} recommended = monitor.get_recommended_workers(4) @@ -106,69 +108,69 @@ def test_worker_recommendation(self): class TestProgressManager(unittest.TestCase): """Test progress management functionality.""" - + def test_task_registration(self): """Test task registration.""" manager = ProgressManager() manager.register_task("test_task", "Test Task") - + tasks = manager.get_task_details() self.assertIn("test_task", tasks) self.assertEqual(tasks["test_task"].name, "Test Task") self.assertEqual(tasks["test_task"].status, TaskStatus.PENDING) - + def test_progress_updates(self): """Test progress updates.""" manager = ProgressManager() manager.register_task("test_task", "Test Task") - + # Update progress manager.update_progress("test_task", 50.0, "Processing...") - + tasks = manager.get_task_details() task = tasks["test_task"] self.assertEqual(task.progress, 50.0) self.assertEqual(task.message, "Processing...") self.assertEqual(task.status, TaskStatus.RUNNING) self.assertIsNotNone(task.start_time) - + def test_task_completion(self): """Test task completion.""" manager = ProgressManager() manager.register_task("test_task", "Test Task") - + # Complete task manager.complete_task("test_task", True) - + tasks = manager.get_task_details() task = tasks["test_task"] self.assertEqual(task.status, TaskStatus.COMPLETED) self.assertEqual(task.progress, 100.0) self.assertIsNotNone(task.end_time) - + def test_overall_progress(self): """Test overall progress calculation.""" manager = ProgressManager() manager.register_task("task1", "Task 1") manager.register_task("task2", "Task 2") - + manager.update_progress("task1", 100.0) manager.update_progress("task2", 50.0) - + overall = manager.get_overall_progress() self.assertEqual(overall, 75.0) # (100 + 50) / 2 - + def test_status_summary(self): """Test status summary.""" manager = ProgressManager() manager.register_task("task1", "Task 1") manager.register_task("task2", "Task 2") manager.register_task("task3", "Task 3") - + manager.complete_task("task1", True) manager.update_progress("task2", 50.0) manager.complete_task("task3", False) - + summary = manager.get_status_summary() self.assertEqual(summary['total'], 3) self.assertEqual(summary['completed'], 1) @@ -178,27 +180,27 @@ def test_status_summary(self): class TestSmartParallelAudioAnalyzer(unittest.TestCase): """Test smart parallel audio analyzer.""" - + def setUp(self): """Set up test fixtures.""" # Create a temporary audio file for testing self.temp_dir = tempfile.mkdtemp() self.test_file = os.path.join(self.temp_dir, "test.wav") - + # Generate test audio (1 second of sine wave) sr = 22050 duration = 1.0 t = np.linspace(0, duration, int(sr * duration)) audio = np.sin(2 * np.pi * 440 * t) # 440 Hz sine wave - + sf.write(self.test_file, audio, sr) - + def tearDown(self): """Clean up test fixtures.""" if os.path.exists(self.test_file): os.remove(self.test_file) os.rmdir(self.temp_dir) - + @patch('src.bpm_detector.auto_parallel.cpu_count') @patch('src.bpm_detector.auto_parallel.psutil') def test_analyzer_initialization(self, mock_psutil, mock_cpu_count): @@ -208,42 +210,46 @@ def test_analyzer_initialization(self, mock_psutil, mock_cpu_count): mock_psutil.virtual_memory.return_value.available = 8 * 1024**3 mock_psutil.cpu_percent.return_value = 30.0 mock_psutil.virtual_memory.return_value.percent = 50.0 - + analyzer = SmartParallelAudioAnalyzer(auto_parallel=True) - + self.assertIsNotNone(analyzer._parallel_config) self.assertTrue(analyzer._parallel_config.enable_parallel) - + @patch('src.bpm_detector.auto_parallel.cpu_count') @patch('src.bpm_detector.auto_parallel.psutil') def test_parallel_disabled(self, mock_psutil, mock_cpu_count): """Test analyzer with parallel processing disabled.""" analyzer = SmartParallelAudioAnalyzer(auto_parallel=False) - + self.assertIsNone(analyzer._parallel_config) - + def test_manual_worker_override(self): """Test manual worker count override.""" analyzer = SmartParallelAudioAnalyzer(auto_parallel=True, max_workers=4) - + if analyzer._parallel_config: self.assertEqual(analyzer._parallel_config.max_workers, 4) - - @patch('src.bpm_detector.parallel_analyzer.SmartParallelAudioAnalyzer._should_use_parallel') + + @patch( + 'src.bpm_detector.parallel_analyzer.SmartParallelAudioAnalyzer._should_use_parallel' + ) def test_fallback_to_sequential(self, mock_should_use_parallel): """Test fallback to sequential processing.""" mock_should_use_parallel.return_value = False - + analyzer = SmartParallelAudioAnalyzer(auto_parallel=True) - + # Should fall back to parent class method - with patch.object(analyzer.__class__.__bases__[0], 'analyze_file') as mock_parent: + with patch.object( + analyzer.__class__.__bases__[0], 'analyze_file' + ) as mock_parent: mock_parent.return_value = {"test": "result"} - - result = analyzer.analyze_file(self.test_file, comprehensive=True) - + + analyzer.analyze_file(self.test_file, comprehensive=True) + mock_parent.assert_called_once() - + def test_multiple_file_processing(self): """Test multiple file processing.""" # Create additional test files @@ -256,15 +262,17 @@ def test_multiple_file_processing(self): audio = np.sin(2 * np.pi * (440 + i * 100) * t) sf.write(file_path, audio, sr) test_files.append(file_path) - - analyzer = SmartParallelAudioAnalyzer(auto_parallel=False) # Disable for testing - + + analyzer = SmartParallelAudioAnalyzer( + auto_parallel=False + ) # Disable for testing + # Test multiple file analysis results = analyzer.analyze_file(test_files, comprehensive=False) - + self.assertIsInstance(results, dict) self.assertEqual(len(results), 3) - + # Clean up for file_path in test_files: os.remove(file_path) @@ -277,14 +285,14 @@ def setUp(self): """Set up test fixtures.""" self.temp_dir = tempfile.mkdtemp() self.sr = 22050 - + # Create test audio file duration = 2.0 t = np.linspace(0, duration, int(self.sr * duration)) audio = np.sin(2 * np.pi * 440 * t) self.test_file = os.path.join(self.temp_dir, "test.wav") sf.write(self.test_file, audio, self.sr) - + self.analyzer = SmartParallelAudioAnalyzer(auto_parallel=True) def tearDown(self): @@ -296,20 +304,20 @@ def tearDown(self): def test_progress_manager_integration(self): """Test integration with progress manager.""" progress_updates = [] - + def progress_callback(progress, message): progress_updates.append((progress, message)) - - results = self.analyzer.analyze_file( + + self.analyzer.analyze_file( self.test_file, comprehensive=True, progress_callback=progress_callback, - detailed_progress=True + detailed_progress=True, ) - + # Should have received progress updates self.assertGreater(len(progress_updates), 0) - + # Progress should be between 0 and 100 for progress, message in progress_updates: self.assertGreaterEqual(progress, 0.0) @@ -319,43 +327,43 @@ def progress_callback(progress, message): def test_system_resource_monitoring(self): """Test system resource monitoring during analysis.""" from src.bpm_detector.auto_parallel import SystemMonitor - + # Enable system monitoring monitor = SystemMonitor() monitor.start_monitoring() - + try: results = self.analyzer.analyze_file(self.test_file, comprehensive=True) - + # Should complete successfully even with monitoring self.assertIsInstance(results, dict) self.assertIn('basic_info', results) - + # Check monitoring functionality should_reduce = monitor.should_reduce_parallelism() self.assertIsInstance(should_reduce, bool) - + finally: monitor.stop_monitoring() def test_performance_profiling(self): """Test performance profiling functionality.""" from src.bpm_detector.auto_parallel import PerformanceProfiler - + profiler = PerformanceProfiler() - + # Start profiling profile_data = profiler.start_profiling("test_analysis") - + # Run analysis results = self.analyzer.analyze_file(self.test_file, comprehensive=True) - + # End profiling profiler.end_profiling(profile_data) - + # Should complete successfully with profiling self.assertIsInstance(results, dict) - + # Check profiling data summary = profiler.get_performance_summary() self.assertIsInstance(summary, dict) @@ -363,10 +371,9 @@ def test_performance_profiling(self): def test_adaptive_worker_adjustment(self): """Test adaptive worker count adjustment.""" from src.bpm_detector.auto_parallel import AutoParallelConfig - + # Test with different file counts - small_batch = [self.test_file] - + # Create multiple test files large_batch = [] for i in range(5): @@ -376,17 +383,17 @@ def test_adaptive_worker_adjustment(self): audio = np.sin(2 * np.pi * (440 + i * 100) * t) sf.write(file_path, audio, self.sr) large_batch.append(file_path) - + try: # Test file count adjustment base_config = AutoParallelConfig.get_optimal_config() adjusted_config = AutoParallelConfig.get_file_count_adjustment( len(large_batch), base_config ) - + self.assertIsInstance(adjusted_config.max_workers, int) self.assertGreater(adjusted_config.max_workers, 0) - + finally: # Clean up batch files for file_path in large_batch: @@ -399,26 +406,26 @@ def test_error_handling_in_parallel(self): corrupted_file = os.path.join(self.temp_dir, "corrupted.wav") with open(corrupted_file, 'w') as f: f.write("This is not a valid audio file") - + test_files = [self.test_file, corrupted_file] - + try: results = self.analyzer.analyze_file(test_files, comprehensive=False) - + # Should handle errors gracefully self.assertIsInstance(results, dict) - + # Good file should have results self.assertIn(self.test_file, results) - + # Results should be valid for good file if self.test_file in results: self.assertIsInstance(results[self.test_file], dict) - + except Exception as e: # If exception is raised, it should be handled gracefully self.assertIsInstance(e, Exception) - + finally: if os.path.exists(corrupted_file): os.remove(corrupted_file) @@ -426,18 +433,15 @@ def test_error_handling_in_parallel(self): def test_memory_management(self): """Test memory management during parallel processing.""" # Test with limited workers to check memory management - limited_analyzer = SmartParallelAudioAnalyzer( - auto_parallel=True, - max_workers=2 - ) - + limited_analyzer = SmartParallelAudioAnalyzer(auto_parallel=True, max_workers=2) + try: results = limited_analyzer.analyze_file(self.test_file, comprehensive=True) - + # Should complete without memory issues self.assertIsInstance(results, dict) self.assertIn('basic_info', results) - + finally: if hasattr(limited_analyzer, 'cleanup'): limited_analyzer.cleanup() @@ -445,10 +449,10 @@ def test_memory_management(self): def test_parallel_strategy_selection(self): """Test automatic selection of parallel strategy.""" from src.bpm_detector.auto_parallel import AutoParallelConfig, ParallelStrategy - + # Get optimal configuration config = AutoParallelConfig.get_optimal_config() - + # Should have a valid strategy self.assertIsInstance(config.strategy, ParallelStrategy) # Should have a valid strategy (including AGGRESSIVE_PARALLEL) @@ -458,10 +462,10 @@ def test_parallel_strategy_selection(self): ParallelStrategy.AGGRESSIVE_PARALLEL, ParallelStrategy.BALANCED_PARALLEL, ParallelStrategy.CONSERVATIVE_PARALLEL, - ParallelStrategy.SEQUENTIAL_ONLY + ParallelStrategy.SEQUENTIAL_ONLY, ] self.assertIn(config.strategy, valid_strategies) - + # Should have reasonable worker count self.assertGreater(config.max_workers, 0) self.assertLessEqual(config.max_workers, 32) # Reasonable upper bound @@ -469,14 +473,14 @@ def test_parallel_strategy_selection(self): def test_cleanup_functionality(self): """Test proper cleanup of parallel resources.""" analyzer = SmartParallelAudioAnalyzer(auto_parallel=True) - + # Run some analysis results = analyzer.analyze_file(self.test_file, comprehensive=False) self.assertIsInstance(results, dict) - + # Test explicit cleanup analyzer.cleanup() - + # Should not raise errors after cleanup try: analyzer.cleanup() # Second cleanup should be safe @@ -485,26 +489,29 @@ def test_cleanup_functionality(self): def test_detailed_progress_tracking(self): """Test detailed progress tracking functionality.""" - from src.bpm_detector.progress_manager import ProgressManager, DetailedProgressDisplay - + from src.bpm_detector.progress_manager import ( + ProgressManager, + DetailedProgressDisplay, + ) + progress_manager = ProgressManager() detailed_display = DetailedProgressDisplay() - + # Register some tasks progress_manager.register_task("task1", "Test Task 1") progress_manager.register_task("task2", "Test Task 2") - + # Update progress progress_manager.update_progress("task1", 50.0, "Halfway done") progress_manager.update_progress("task2", 25.0, "Quarter done") - + # Test display update detailed_display.update(progress_manager) - + # Complete tasks progress_manager.complete_task("task1", success=True) progress_manager.complete_task("task2", success=True) - + # Get final status status = progress_manager.get_status_summary() self.assertIsInstance(status, dict) @@ -513,4 +520,4 @@ def test_detailed_progress_tracking(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_structure_analyzer.py b/tests/test_structure_analyzer.py index cc544da..457a777 100644 --- a/tests/test_structure_analyzer.py +++ b/tests/test_structure_analyzer.py @@ -2,127 +2,140 @@ import unittest import numpy as np -import librosa from src.bpm_detector.structure_analyzer import StructureAnalyzer class TestStructureAnalyzer(unittest.TestCase): """Test cases for StructureAnalyzer.""" - + def setUp(self): """Set up test fixtures.""" self.analyzer = StructureAnalyzer() self.sr = 22050 self.duration = 20.0 # 20 seconds - + # Create a test signal with different sections t = np.linspace(0, self.duration, int(self.sr * self.duration)) - + # Create signal with changing characteristics signal = np.zeros_like(t) - + # Section 1: Low energy (intro) mask1 = t < 5 signal[mask1] = 0.2 * np.sin(2 * np.pi * 440 * t[mask1]) - + # Section 2: Medium energy (verse) mask2 = (t >= 5) & (t < 10) signal[mask2] = 0.5 * ( - np.sin(2 * np.pi * 440 * t[mask2]) + - 0.5 * np.sin(2 * np.pi * 880 * t[mask2]) + np.sin(2 * np.pi * 440 * t[mask2]) + + 0.5 * np.sin(2 * np.pi * 880 * t[mask2]) ) - + # Section 3: High energy (chorus) mask3 = (t >= 10) & (t < 15) signal[mask3] = 0.8 * ( - np.sin(2 * np.pi * 440 * t[mask3]) + - np.sin(2 * np.pi * 880 * t[mask3]) + - 0.5 * np.sin(2 * np.pi * 1320 * t[mask3]) + np.sin(2 * np.pi * 440 * t[mask3]) + + np.sin(2 * np.pi * 880 * t[mask3]) + + 0.5 * np.sin(2 * np.pi * 1320 * t[mask3]) ) - + # Section 4: Medium energy (verse repeat) mask4 = t >= 15 signal[mask4] = 0.5 * ( - np.sin(2 * np.pi * 440 * t[mask4]) + - 0.5 * np.sin(2 * np.pi * 880 * t[mask4]) + np.sin(2 * np.pi * 440 * t[mask4]) + + 0.5 * np.sin(2 * np.pi * 880 * t[mask4]) ) - + # Add some noise signal += 0.05 * np.random.randn(len(t)) - + self.test_signal = signal - + def test_extract_structural_features(self): """Test enhanced structural feature extraction.""" features = self.analyzer.extract_structural_features(self.test_signal, self.sr) - + # Check that all expected features are present (including new ones) expected_features = [ - 'mfcc', 'chroma', 'spectral_centroid', 'rms', - 'zcr', 'onset_strength', 'spectral_contrast', 'spectral_rolloff' + 'mfcc', + 'chroma', + 'spectral_centroid', + 'rms', + 'zcr', + 'onset_strength', + 'spectral_contrast', + 'spectral_rolloff', ] - + for feature_name in expected_features: self.assertIn(feature_name, features) self.assertIsInstance(features[feature_name], np.ndarray) self.assertGreater(features[feature_name].size, 0) - + def test_compute_self_similarity_matrix(self): """Test self-similarity matrix computation.""" features = self.analyzer.extract_structural_features(self.test_signal, self.sr) similarity_matrix = self.analyzer.compute_self_similarity_matrix(features) - + # Check shape (should be square) self.assertEqual(similarity_matrix.shape[0], similarity_matrix.shape[1]) - + # Check diagonal is 1 (self-similarity) diagonal = np.diag(similarity_matrix) np.testing.assert_allclose(diagonal, 1.0, rtol=1e-10) - + # Check symmetry np.testing.assert_allclose(similarity_matrix, similarity_matrix.T, rtol=1e-10) - + # Check values are in [0, 1] range self.assertTrue(np.all(similarity_matrix >= 0)) self.assertTrue(np.all(similarity_matrix <= 1)) - + def test_detect_boundaries(self): """Test boundary detection.""" features = self.analyzer.extract_structural_features(self.test_signal, self.sr) similarity_matrix = self.analyzer.compute_self_similarity_matrix(features) boundaries = self.analyzer.detect_boundaries(similarity_matrix, self.sr) - + # Should detect at least start and end self.assertGreaterEqual(len(boundaries), 2) - + # First boundary should be 0 self.assertEqual(boundaries[0], 0) - + # Last boundary should be within reasonable range self.assertLessEqual(boundaries[-1], similarity_matrix.shape[0] - 1) self.assertGreater(boundaries[-1], similarity_matrix.shape[0] * 0.5) - + # Boundaries should be sorted self.assertEqual(boundaries, sorted(boundaries)) - + def test_classify_sections(self): """Test enhanced section classification.""" boundaries = [0, 100, 200, 300, 400] # Mock boundaries - sections = self.analyzer.classify_sections(self.test_signal, self.sr, boundaries) - + sections = self.analyzer.classify_sections( + self.test_signal, self.sr, boundaries + ) + # Should have one less section than boundaries self.assertEqual(len(sections), len(boundaries) - 1) - + # Check enhanced section structure for section in sections: required_keys = [ - 'type', 'start_time', 'end_time', 'duration', - 'characteristics', 'energy_level', 'complexity', - 'relative_energy', 'rhythm_density' + 'type', + 'start_time', + 'end_time', + 'duration', + 'characteristics', + 'energy_level', + 'complexity', + 'relative_energy', + 'rhythm_density', ] for key in required_keys: self.assertIn(key, section) - + # Check types self.assertIsInstance(section['type'], str) self.assertIsInstance(section['start_time'], float) @@ -133,19 +146,19 @@ def test_classify_sections(self): self.assertIsInstance(section['complexity'], float) self.assertIsInstance(section['relative_energy'], float) self.assertIsInstance(section['rhythm_density'], float) - + # Check time consistency self.assertLessEqual(section['start_time'], section['end_time']) self.assertAlmostEqual( section['duration'], section['end_time'] - section['start_time'], - places=2 + places=2, ) - + # Check J-Pop ASCII labels self.assertIn('ascii_label', section) self.assertIsInstance(section['ascii_label'], str) - + def test_analyze_form(self): """Test form analysis.""" # Mock sections @@ -155,19 +168,23 @@ def test_analyze_form(self): {'type': 'chorus', 'duration': 16}, {'type': 'verse', 'duration': 16}, {'type': 'chorus', 'duration': 16}, - {'type': 'outro', 'duration': 8} + {'type': 'outro', 'duration': 8}, ] - + form_analysis = self.analyzer.analyze_form(mock_sections) - + # Check required keys required_keys = [ - 'form', 'repetition_ratio', 'structural_complexity', - 'section_count', 'unique_sections', 'section_types' + 'form', + 'repetition_ratio', + 'structural_complexity', + 'section_count', + 'unique_sections', + 'section_types', ] for key in required_keys: self.assertIn(key, form_analysis) - + # Check types self.assertIsInstance(form_analysis['form'], str) self.assertIsInstance(form_analysis['repetition_ratio'], float) @@ -175,50 +192,58 @@ def test_analyze_form(self): self.assertIsInstance(form_analysis['section_count'], int) self.assertIsInstance(form_analysis['unique_sections'], int) self.assertIsInstance(form_analysis['section_types'], list) - + # Check values self.assertEqual(form_analysis['section_count'], len(mock_sections)) self.assertGreater(form_analysis['unique_sections'], 0) self.assertLessEqual(form_analysis['unique_sections'], len(mock_sections)) - + def test_detect_repetitions(self): """Test repetition detection.""" features = self.analyzer.extract_structural_features(self.test_signal, self.sr) similarity_matrix = self.analyzer.compute_self_similarity_matrix(features) repetitions = self.analyzer.detect_repetitions(similarity_matrix, self.sr) - + # Should return a list self.assertIsInstance(repetitions, list) - + # Check repetition structure for rep in repetitions: required_keys = [ - 'first_occurrence', 'second_occurrence', - 'duration', 'similarity' + 'first_occurrence', + 'second_occurrence', + 'duration', + 'similarity', ] for key in required_keys: self.assertIn(key, rep) self.assertIsInstance(rep[key], float) - + # Check logical constraints self.assertGreater(rep['duration'], 0) self.assertGreaterEqual(rep['similarity'], 0) self.assertLessEqual(rep['similarity'], 1) self.assertLess(rep['first_occurrence'], rep['second_occurrence']) - + def test_analyze_complete(self): """Test complete structural analysis.""" result = self.analyzer.analyze(self.test_signal, self.sr) - + # Check all required keys are present required_keys = [ - 'sections', 'form', 'repetition_ratio', 'structural_complexity', - 'section_count', 'unique_sections', 'repetitions', 'boundaries' + 'sections', + 'form', + 'repetition_ratio', + 'structural_complexity', + 'section_count', + 'unique_sections', + 'repetitions', + 'boundaries', ] - + for key in required_keys: self.assertIn(key, result) - + # Check types self.assertIsInstance(result['sections'], list) self.assertIsInstance(result['form'], str) @@ -228,12 +253,12 @@ def test_analyze_complete(self): self.assertIsInstance(result['unique_sections'], int) self.assertIsInstance(result['repetitions'], list) self.assertIsInstance(result['boundaries'], list) - + def test_section_to_letter(self): """Test section type to letter conversion.""" if not hasattr(self.analyzer, '_section_to_letter'): self.skipTest("_section_to_letter method not implemented") - + test_cases = { 'intro': 'I', 'verse': 'A', @@ -241,17 +266,17 @@ def test_section_to_letter(self): 'bridge': 'C', 'instrumental': 'D', 'outro': 'O', - 'unknown': 'X' + 'unknown': 'X', } - + for section_type, expected_letter in test_cases.items(): result = self.analyzer._section_to_letter(section_type) self.assertEqual(result, expected_letter) - + def test_empty_input(self): """Test behavior with empty input.""" empty_signal = np.array([]) - + # Should handle empty input gracefully try: result = self.analyzer.analyze(empty_signal, self.sr) @@ -259,77 +284,79 @@ def test_empty_input(self): except Exception: # Or raise appropriate exception pass - + def test_vocal_detection(self): """Test vocal presence detection.""" if not hasattr(self.analyzer, '_detect_vocal_presence'): self.skipTest("_detect_vocal_presence method not implemented") - + # Create signal with vocal-like frequencies t = np.linspace(0, 2.0, int(self.sr * 2.0)) vocal_signal = np.sin(2 * np.pi * 200 * t) # 200 Hz (vocal range) - + has_vocal = self.analyzer._detect_vocal_presence(vocal_signal, self.sr) self.assertIsInstance(has_vocal, bool) - + def test_enhanced_boundary_detection(self): """Test enhanced beat-synchronized boundary detection.""" features = self.analyzer.extract_structural_features(self.test_signal, self.sr) similarity_matrix = self.analyzer.compute_self_similarity_matrix(features) - + # Test with different BPM values bpm_values = [120, 130, 140] for bpm in bpm_values: - boundaries = self.analyzer.detect_boundaries(similarity_matrix, self.sr, bpm=bpm) - + boundaries = self.analyzer.detect_boundaries( + similarity_matrix, self.sr, bpm=bpm + ) + # Should detect reasonable number of boundaries self.assertGreaterEqual(len(boundaries), 2) self.assertLessEqual(len(boundaries), 10) # Not too many - + # Boundaries should be sorted self.assertEqual(boundaries, sorted(boundaries)) - + def test_rbf_similarity_matrix(self): """Test RBF kernel similarity matrix computation.""" features = self.analyzer.extract_structural_features(self.test_signal, self.sr) similarity_matrix = self.analyzer.compute_self_similarity_matrix(features) - + # RBF kernel should produce valid similarity matrix self.assertEqual(similarity_matrix.shape[0], similarity_matrix.shape[1]) self.assertTrue(np.all(similarity_matrix >= 0)) self.assertTrue(np.all(similarity_matrix <= 1)) - + # Diagonal should be close to 1 (self-similarity) diagonal = np.diag(similarity_matrix) self.assertTrue(np.all(diagonal > 0.9)) - + def test_relative_energy_analysis(self): """Test relative energy analysis for A/B/S classification.""" # Create signal with clear energy progression (A-melo < B-melo < Sabi) t = np.linspace(0, 30.0, int(self.sr * 30.0)) - + # A-melo: low energy - a_melo = 0.3 * np.sin(2 * np.pi * 440 * t[:int(len(t)/3)]) - + a_melo = 0.3 * np.sin(2 * np.pi * 440 * t[: int(len(t) / 3)]) + # B-melo: medium energy b_melo = 0.6 * ( - np.sin(2 * np.pi * 440 * t[int(len(t)/3):int(2*len(t)/3)]) + - 0.5 * np.sin(2 * np.pi * 880 * t[int(len(t)/3):int(2*len(t)/3)]) + np.sin(2 * np.pi * 440 * t[int(len(t) / 3) : int(2 * len(t) / 3)]) + + 0.5 * np.sin(2 * np.pi * 880 * t[int(len(t) / 3) : int(2 * len(t) / 3)]) ) - + # Sabi: high energy sabi = 0.9 * ( - np.sin(2 * np.pi * 440 * t[int(2*len(t)/3):]) + - np.sin(2 * np.pi * 880 * t[int(2*len(t)/3):]) + - 0.5 * np.sin(2 * np.pi * 1320 * t[int(2*len(t)/3):]) + np.sin(2 * np.pi * 440 * t[int(2 * len(t) / 3) :]) + + np.sin(2 * np.pi * 880 * t[int(2 * len(t) / 3) :]) + + 0.5 * np.sin(2 * np.pi * 1320 * t[int(2 * len(t) / 3) :]) ) - + test_signal = np.concatenate([a_melo, b_melo, sabi]) - + # Analyze with enhanced features result = self.analyzer.analyze(test_signal, self.sr, bpm=130) sections = result['sections'] - + # Should detect energy progression if len(sections) >= 3: energies = [s['relative_energy'] for s in sections[:3]] @@ -337,45 +364,41 @@ def test_relative_energy_analysis(self): self.assertIsInstance(energies[0], float) self.assertIsInstance(energies[1], float) self.assertIsInstance(energies[2], float) - + def test_fade_ending_detection(self): """Test fade ending detection for outro identification.""" # Create signal with fade ending t = np.linspace(0, 20.0, int(self.sr * 20.0)) - + # Normal section - normal_part = 0.7 * np.sin(2 * np.pi * 440 * t[:int(len(t)*0.8)]) - + normal_part = 0.7 * np.sin(2 * np.pi * 440 * t[: int(len(t) * 0.8)]) + # Fade ending - fade_part = t[int(len(t)*0.8):] + fade_part = t[int(len(t) * 0.8) :] fade_envelope = np.linspace(0.7, 0.1, len(fade_part)) fade_signal = fade_envelope * np.sin(2 * np.pi * 440 * fade_part) - + test_signal = np.concatenate([normal_part, fade_signal]) - + # Test fade detection if hasattr(self.analyzer.section_processor, '_detect_fade_ending'): - mock_section = { - 'start_time': 16.0, - 'end_time': 20.0, - 'duration': 4.0 - } - + mock_section = {'start_time': 16.0, 'end_time': 20.0, 'duration': 4.0} + is_fade = self.analyzer.section_processor._detect_fade_ending( mock_section, test_signal, self.sr ) self.assertIsInstance(is_fade, bool) - + def test_melody_jump_rate(self): """Test melody jump rate calculation.""" if hasattr(self.analyzer.section_classifier, '_calculate_melody_jump_rate'): # Create signal with melodic jumps t = np.linspace(0, 5.0, int(self.sr * 5.0)) - + # Create melody with large jumps frequencies = [440, 880, 440, 1320, 440] # Large jumps segment_length = len(t) // len(frequencies) - + melody_signal = np.zeros_like(t) for i, freq in enumerate(frequencies): start_idx = i * segment_length @@ -383,26 +406,21 @@ def test_melody_jump_rate(self): melody_signal[start_idx:end_idx] = np.sin( 2 * np.pi * freq * t[start_idx:end_idx] ) - + jump_rate = self.analyzer.section_classifier._calculate_melody_jump_rate( melody_signal, self.sr ) - + self.assertIsInstance(jump_rate, float) self.assertGreaterEqual(jump_rate, 0.0) self.assertLessEqual(jump_rate, 1.0) - + def test_jpop_section_labels(self): """Test J-Pop specific section labeling.""" result = self.analyzer.analyze(self.test_signal, self.sr, bpm=130) sections = result['sections'] - + # Check that J-Pop ASCII labels are present - valid_labels = [ - 'Intro', 'A-melo', 'B-melo', 'Sabi', 'C-melo', - 'Kansou', 'Break', 'Interlude', 'Solo', 'Serifu', 'Outro' - ] - for section in sections: self.assertIn('ascii_label', section) # ASCII label should be one of the valid J-Pop terms @@ -413,4 +431,4 @@ def test_jpop_section_labels(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_timbre_analyzer.py b/tests/test_timbre_analyzer.py index 93cafc6..f1f7ce5 100644 --- a/tests/test_timbre_analyzer.py +++ b/tests/test_timbre_analyzer.py @@ -2,7 +2,6 @@ import unittest import numpy as np -from unittest.mock import patch, MagicMock from src.bpm_detector.timbre_analyzer import TimbreAnalyzer @@ -13,31 +12,30 @@ def setUp(self): """Set up test fixtures.""" self.analyzer = TimbreAnalyzer(hop_length=512, n_fft=2048) self.sr = 22050 - + # Create test audio with different timbral characteristics duration = 5 # seconds t = np.linspace(0, duration, int(self.sr * duration)) - + # Bright sound (high frequencies) self.bright_audio = ( - 0.3 * np.sin(2 * np.pi * 440 * t) + - 0.4 * np.sin(2 * np.pi * 880 * t) + - 0.3 * np.sin(2 * np.pi * 1760 * t) + 0.3 * np.sin(2 * np.pi * 440 * t) + + 0.4 * np.sin(2 * np.pi * 880 * t) + + 0.3 * np.sin(2 * np.pi * 1760 * t) ) - + # Warm sound (low frequencies) self.warm_audio = ( - 0.5 * np.sin(2 * np.pi * 220 * t) + - 0.3 * np.sin(2 * np.pi * 330 * t) + - 0.2 * np.sin(2 * np.pi * 440 * t) + 0.5 * np.sin(2 * np.pi * 220 * t) + + 0.3 * np.sin(2 * np.pi * 330 * t) + + 0.2 * np.sin(2 * np.pi * 440 * t) ) - + # Rough sound (with noise) - self.rough_audio = ( - 0.5 * np.sin(2 * np.pi * 440 * t) + - 0.3 * np.random.randn(len(t)) + self.rough_audio = 0.5 * np.sin(2 * np.pi * 440 * t) + 0.3 * np.random.randn( + len(t) ) - + # Dense sound (many harmonics) harmonics = [440 * (i + 1) for i in range(8)] self.dense_audio = sum( @@ -48,59 +46,79 @@ def setUp(self): def test_extract_timbral_features(self): """Test timbral feature extraction.""" features = self.analyzer.extract_timbral_features(self.bright_audio, self.sr) - + # Check required features - expected_features = ['spectral_centroid', 'spectral_contrast', 'mfcc', - 'spectral_rolloff', 'zero_crossing_rate', 'chroma'] + expected_features = [ + 'spectral_centroid', + 'spectral_contrast', + 'mfcc', + 'spectral_rolloff', + 'zero_crossing_rate', + 'chroma', + ] for feature in expected_features: self.assertIn(feature, features) self.assertIsInstance(features[feature], np.ndarray) self.assertGreater(features[feature].shape[0], 0) - + # MFCC should have multiple coefficients self.assertGreater(features['mfcc'].shape[0], 1) def test_analyze_brightness(self): """Test brightness analysis.""" # Extract features for bright audio - bright_features = self.analyzer.extract_timbral_features(self.bright_audio, self.sr) - bright_score = self.analyzer.analyze_brightness(bright_features['spectral_centroid'], self.sr) - + bright_features = self.analyzer.extract_timbral_features( + self.bright_audio, self.sr + ) + bright_score = self.analyzer.analyze_brightness( + bright_features['spectral_centroid'], self.sr + ) + # Extract features for warm audio warm_features = self.analyzer.extract_timbral_features(self.warm_audio, self.sr) - warm_score = self.analyzer.analyze_brightness(warm_features['spectral_centroid'], self.sr) - + warm_score = self.analyzer.analyze_brightness( + warm_features['spectral_centroid'], self.sr + ) + # Brightness should be a float between 0 and 1 self.assertIsInstance(bright_score, (int, float)) self.assertGreaterEqual(bright_score, 0.0) self.assertLessEqual(bright_score, 1.0) - + self.assertIsInstance(warm_score, (int, float)) self.assertGreaterEqual(warm_score, 0.0) self.assertLessEqual(warm_score, 1.0) - + # Bright audio should have higher brightness score self.assertGreater(bright_score, warm_score) def test_analyze_roughness(self): """Test roughness analysis.""" # Extract features for rough audio - rough_features = self.analyzer.extract_timbral_features(self.rough_audio, self.sr) - rough_score = self.analyzer.analyze_roughness(rough_features['spectral_contrast']) - + rough_features = self.analyzer.extract_timbral_features( + self.rough_audio, self.sr + ) + rough_score = self.analyzer.analyze_roughness( + rough_features['spectral_contrast'] + ) + # Extract features for smooth audio - smooth_features = self.analyzer.extract_timbral_features(self.bright_audio, self.sr) - smooth_score = self.analyzer.analyze_roughness(smooth_features['spectral_contrast']) - + smooth_features = self.analyzer.extract_timbral_features( + self.bright_audio, self.sr + ) + smooth_score = self.analyzer.analyze_roughness( + smooth_features['spectral_contrast'] + ) + # Roughness should be a float between 0 and 1 self.assertIsInstance(rough_score, (int, float)) self.assertGreaterEqual(rough_score, 0.0) self.assertLessEqual(rough_score, 1.0) - + self.assertIsInstance(smooth_score, (int, float)) self.assertGreaterEqual(smooth_score, 0.0) self.assertLessEqual(smooth_score, 1.0) - + # Rough audio should have higher roughness score self.assertGreater(rough_score, smooth_score) @@ -109,64 +127,70 @@ def test_analyze_warmth(self): # Extract features for warm audio warm_features = self.analyzer.extract_timbral_features(self.warm_audio, self.sr) warm_score = self.analyzer.analyze_warmth(warm_features['mfcc']) - + # Extract features for bright audio - bright_features = self.analyzer.extract_timbral_features(self.bright_audio, self.sr) + bright_features = self.analyzer.extract_timbral_features( + self.bright_audio, self.sr + ) bright_score = self.analyzer.analyze_warmth(bright_features['mfcc']) - + # Warmth should be a float between 0 and 1 self.assertIsInstance(warm_score, (int, float)) self.assertGreaterEqual(warm_score, 0.0) self.assertLessEqual(warm_score, 1.0) - + self.assertIsInstance(bright_score, (int, float)) self.assertGreaterEqual(bright_score, 0.0) self.assertLessEqual(bright_score, 1.0) - + # Warm audio should have higher warmth score self.assertGreater(warm_score, bright_score) def test_analyze_density(self): """Test density analysis.""" # Extract features for dense audio - dense_features = self.analyzer.extract_timbral_features(self.dense_audio, self.sr) + dense_features = self.analyzer.extract_timbral_features( + self.dense_audio, self.sr + ) dense_score = self.analyzer.analyze_density(dense_features) - + # Extract features for simple audio - simple_features = self.analyzer.extract_timbral_features(self.bright_audio, self.sr) + simple_features = self.analyzer.extract_timbral_features( + self.bright_audio, self.sr + ) simple_score = self.analyzer.analyze_density(simple_features) - + # Density should be a float between 0 and 1 self.assertIsInstance(dense_score, (int, float)) self.assertGreaterEqual(dense_score, 0.0) self.assertLessEqual(dense_score, 1.0) - + self.assertIsInstance(simple_score, (int, float)) self.assertGreaterEqual(simple_score, 0.0) self.assertLessEqual(simple_score, 1.0) - + # Dense audio should have higher density score self.assertGreater(dense_score, simple_score) def test_classify_instruments(self): """Test instrument classification.""" instruments = self.analyzer.classify_instruments(self.bright_audio, self.sr) - + # Should return a list of instrument detections self.assertIsInstance(instruments, list) - + # Each detection should have required fields for instrument in instruments: self.assertIsInstance(instrument, dict) required_fields = ['instrument', 'confidence', 'prominence'] for field in required_fields: self.assertIn(field, instrument) - + # Check field types and ranges self.assertIsInstance(instrument['instrument'], str) self.assertIsInstance(instrument['confidence'], (int, float)) self.assertIsInstance(instrument['prominence'], (int, float)) - + self.assertGreaterEqual(instrument['confidence'], 0.0) self.assertLessEqual(instrument['confidence'], 1.0) self.assertGreaterEqual(instrument['prominence'], 0.0) @@ -179,15 +203,15 @@ def test_filter_redundant_instruments(self): {'instrument': 'piano', 'confidence': 0.9, 'prominence': 0.8}, {'instrument': 'piano', 'confidence': 0.7, 'prominence': 0.6}, # Duplicate {'instrument': 'guitar', 'confidence': 0.8, 'prominence': 0.7}, - {'instrument': 'drums', 'confidence': 0.6, 'prominence': 0.5} + {'instrument': 'drums', 'confidence': 0.6, 'prominence': 0.5}, ] - + filtered = self.analyzer._filter_redundant_instruments(test_instruments) - + # Should remove duplicate piano entry self.assertIsInstance(filtered, list) self.assertLessEqual(len(filtered), len(test_instruments)) - + # Should keep the higher confidence piano piano_entries = [inst for inst in filtered if inst['instrument'] == 'piano'] if len(piano_entries) > 0: @@ -197,14 +221,14 @@ def test_filter_redundant_instruments(self): def test_calculate_instrument_confidence(self): """Test instrument confidence calculation.""" # Create test magnitude spectrum - freqs = np.linspace(0, self.sr/2, 1024) + freqs = np.linspace(0, self.sr / 2, 1024) magnitude = np.exp(-freqs / 1000) # Decaying spectrum - + # Test piano frequency range confidence = self.analyzer._calculate_instrument_confidence( magnitude, freqs, freq_range=(200, 2000), spectral_shape='harmonic' ) - + self.assertIsInstance(confidence, (int, float)) self.assertGreaterEqual(confidence, 0.0) self.assertLessEqual(confidence, 1.0) @@ -212,25 +236,25 @@ def test_calculate_instrument_confidence(self): def test_calculate_instrument_prominence(self): """Test instrument prominence calculation.""" # Create test magnitude spectrum - freqs = np.linspace(0, self.sr/2, 1024) + freqs = np.linspace(0, self.sr / 2, 1024) magnitude = np.ones_like(freqs) magnitude[100:200] = 5.0 # Peak in specific range - + prominence = self.analyzer._calculate_instrument_prominence( magnitude, freqs, freq_range=(freqs[100], freqs[200]) ) - + self.assertIsInstance(prominence, (int, float)) self.assertGreaterEqual(prominence, 0.0) self.assertLessEqual(prominence, 1.0) - + # Should be high due to the peak in the frequency range self.assertGreater(prominence, 0.5) def test_analyze_effects_usage(self): """Test effects usage analysis.""" effects = self.analyzer.analyze_effects_usage(self.bright_audio, self.sr) - + # Check required effects expected_effects = ['reverb', 'distortion', 'chorus', 'compression'] for effect in expected_effects: @@ -243,15 +267,15 @@ def test_detect_reverb(self): """Test reverb detection.""" # Test with dry audio dry_reverb = self.analyzer._detect_reverb(self.bright_audio, self.sr) - + # Create audio with simulated reverb (add delayed copies) reverb_audio = self.bright_audio.copy() delay_samples = int(0.1 * self.sr) # 100ms delay if len(reverb_audio) > delay_samples: reverb_audio[delay_samples:] += 0.3 * reverb_audio[:-delay_samples] - + wet_reverb = self.analyzer._detect_reverb(reverb_audio, self.sr) - + # Both should be valid scores self.assertIsInstance(dry_reverb, (int, float)) self.assertIsInstance(wet_reverb, (int, float)) @@ -259,7 +283,7 @@ def test_detect_reverb(self): self.assertLessEqual(dry_reverb, 1.0) self.assertGreaterEqual(wet_reverb, 0.0) self.assertLessEqual(wet_reverb, 1.0) - + # Reverb audio should have higher reverb score self.assertGreater(wet_reverb, dry_reverb) @@ -267,11 +291,11 @@ def test_detect_distortion(self): """Test distortion detection.""" # Test with clean audio clean_distortion = self.analyzer._detect_distortion(self.bright_audio, self.sr) - + # Create distorted audio (clipping) distorted_audio = np.clip(3.0 * self.bright_audio, -1.0, 1.0) dirty_distortion = self.analyzer._detect_distortion(distorted_audio, self.sr) - + # Both should be valid scores self.assertIsInstance(clean_distortion, (int, float)) self.assertIsInstance(dirty_distortion, (int, float)) @@ -279,14 +303,14 @@ def test_detect_distortion(self): self.assertLessEqual(clean_distortion, 1.0) self.assertGreaterEqual(dirty_distortion, 0.0) self.assertLessEqual(dirty_distortion, 1.0) - + # Distorted audio should have higher distortion score self.assertGreater(dirty_distortion, clean_distortion) def test_detect_chorus(self): """Test chorus detection.""" chorus_score = self.analyzer._detect_chorus(self.bright_audio, self.sr) - + self.assertIsInstance(chorus_score, (int, float)) self.assertGreaterEqual(chorus_score, 0.0) self.assertLessEqual(chorus_score, 1.0) @@ -295,11 +319,11 @@ def test_detect_compression(self): """Test compression detection.""" # Test with dynamic audio dynamic_compression = self.analyzer._detect_compression(self.bright_audio) - + # Create compressed audio (reduce dynamic range) compressed_audio = np.tanh(2.0 * self.bright_audio) compressed_compression = self.analyzer._detect_compression(compressed_audio) - + # Both should be valid scores self.assertIsInstance(dynamic_compression, (int, float)) self.assertIsInstance(compressed_compression, (int, float)) @@ -307,7 +331,7 @@ def test_detect_compression(self): self.assertLessEqual(dynamic_compression, 1.0) self.assertGreaterEqual(compressed_compression, 0.0) self.assertLessEqual(compressed_compression, 1.0) - + # Compressed audio should have higher compression score self.assertGreater(compressed_compression, dynamic_compression) @@ -315,9 +339,14 @@ def test_analyze_texture(self): """Test texture analysis.""" features = self.analyzer.extract_timbral_features(self.dense_audio, self.sr) texture = self.analyzer.analyze_texture(features) - + # Check required fields - expected_fields = ['spectral_complexity', 'harmonic_richness', 'temporal_stability', 'timbral_consistency'] + expected_fields = [ + 'spectral_complexity', + 'harmonic_richness', + 'temporal_stability', + 'timbral_consistency', + ] for field in expected_fields: self.assertIn(field, texture) self.assertIsInstance(texture[field], (int, float)) @@ -327,15 +356,23 @@ def test_analyze_texture(self): def test_analyze_complete(self): """Test complete timbre analysis.""" results = self.analyzer.analyze(self.bright_audio, self.sr) - + # Check main structure self.assertIsInstance(results, dict) - - expected_sections = ['spectral_features', 'brightness', 'roughness', 'warmth', - 'density', 'dominant_instruments', 'effects_usage', 'texture'] + + expected_sections = [ + 'spectral_features', + 'brightness', + 'roughness', + 'warmth', + 'density', + 'dominant_instruments', + 'effects_usage', + 'texture', + ] for section in expected_sections: self.assertIn(section, results) - + # Check specific values self.assertIsInstance(results['brightness'], (int, float)) self.assertIsInstance(results['roughness'], (int, float)) @@ -348,18 +385,17 @@ def test_analyze_complete(self): def test_analyze_with_progress_callback(self): """Test analysis with progress callback.""" progress_calls = [] - + def progress_callback(progress, message): progress_calls.append((progress, message)) - - results = self.analyzer.analyze( - self.bright_audio, self.sr, - progress_callback=progress_callback + + self.analyzer.analyze( + self.bright_audio, self.sr, progress_callback=progress_callback ) - + # Check that progress was reported self.assertGreater(len(progress_calls), 0) - + # Check progress values for progress, message in progress_calls: self.assertGreaterEqual(progress, 0.0) @@ -369,7 +405,7 @@ def progress_callback(progress, message): def test_empty_audio_handling(self): """Test handling of empty audio.""" empty_audio = np.array([]) - + try: results = self.analyzer.analyze(empty_audio, self.sr) self.assertIsInstance(results, dict) @@ -379,10 +415,12 @@ def test_empty_audio_handling(self): def test_short_audio_handling(self): """Test handling of very short audio.""" - short_audio = 0.5 * np.sin(2 * np.pi * 440 * np.linspace(0, 0.5, int(self.sr * 0.5))) - + short_audio = 0.5 * np.sin( + 2 * np.pi * 440 * np.linspace(0, 0.5, int(self.sr * 0.5)) + ) + results = self.analyzer.analyze(short_audio, self.sr) - + # Should handle short audio self.assertIsInstance(results, dict) self.assertIn('brightness', results) @@ -391,12 +429,12 @@ def test_short_audio_handling(self): def test_silence_handling(self): """Test handling of silent audio.""" silent_audio = np.zeros(self.sr * 2) - + results = self.analyzer.analyze(silent_audio, self.sr) - + # Should handle silence gracefully self.assertIsInstance(results, dict) - + # Brightness should be low for silence self.assertLess(results['brightness'], 0.1) @@ -404,7 +442,7 @@ def test_different_parameters(self): """Test analyzer with different parameters.""" custom_analyzer = TimbreAnalyzer(hop_length=256, n_fft=4096) results = custom_analyzer.analyze(self.bright_audio, self.sr) - + # Should produce valid results with different parameters self.assertIsInstance(results, dict) self.assertIn('brightness', results) @@ -415,10 +453,10 @@ def test_instrument_classification_consistency(self): # Run classification multiple times on same audio results1 = self.analyzer.classify_instruments(self.bright_audio, self.sr) results2 = self.analyzer.classify_instruments(self.bright_audio, self.sr) - + # Should return same number of instruments self.assertEqual(len(results1), len(results2)) - + # Should have similar instrument types (allowing for some variation) if len(results1) > 0 and len(results2) > 0: instruments1 = {inst['instrument'] for inst in results1} @@ -428,4 +466,4 @@ def test_instrument_classification_consistency(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main()