diff --git a/Cargo.lock b/Cargo.lock index be4ffdc..1130ab2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ dependencies = [ [[package]] name = "drainage" -version = "0.1.0" +version = "0.1.7" dependencies = [ "anyhow", "aws-config", diff --git a/Cargo.toml b/Cargo.toml index c21bcb0..0f07b29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "drainage" -version = "0.1.0" +version = "0.1.7" edition = "2021" [lib] diff --git a/examples/analyze_any_table.py b/examples/analyze_any_table.py index f064bd4..775472c 100644 --- a/examples/analyze_any_table.py +++ b/examples/analyze_any_table.py @@ -167,10 +167,12 @@ def analyze_any_table( print(" python analyze_any_table.py s3://my-bucket/my-table us-west-2") print(" # Specify table type explicitly") print( - " python analyze_any_table.py s3://my-bucket/my-delta-table delta us-west-2" + " python analyze_any_table.py s3://my-bucket/my-delta-table delta " + "us-west-2" ) print( - " python analyze_any_table.py s3://my-bucket/my-iceberg-table iceberg us-west-2" + " python analyze_any_table.py s3://my-bucket/my-iceberg-table " + "iceberg us-west-2" ) sys.exit(1) diff --git a/examples/analyze_iceberg_table.py b/examples/analyze_iceberg_table.py index 598e14a..fcad9e4 100644 --- a/examples/analyze_iceberg_table.py +++ b/examples/analyze_iceberg_table.py @@ -147,7 +147,8 @@ def analyze_iceberg_table(s3_path: str, aws_region: str = "us-west-2"): print("Usage: python analyze_iceberg_table.py [aws_region]") print("\nExample:") print( - " python analyze_iceberg_table.py s3://my-bucket/my-iceberg-table us-west-2" + " python analyze_iceberg_table.py s3://my-bucket/my-iceberg-table " + "us-west-2" ) sys.exit(1) diff --git a/examples/monitor_multiple_tables.py b/examples/monitor_multiple_tables.py index 2e8b6e6..b03eefd 100644 --- a/examples/monitor_multiple_tables.py +++ b/examples/monitor_multiple_tables.py @@ -81,7 +81,8 @@ def monitor_tables(tables: List[Tuple[str, str]], aws_region: str = "us-west-2") print("Table Health Overview (sorted by health score):") print(f"{'─'*80}") print( - f"{'Path':<35} {'Type':<8} {'Health':<8} {'Files':<8} {'Size(GB)':<10} {'Issues'}" + f"{'Path':<35} {'Type':<8} {'Health':<8} {'Files':<8} " + f"{'Size(GB)':<10} {'Issues'}" ) print(f"{'─'*80}") @@ -95,8 +96,9 @@ def monitor_tables(tables: List[Tuple[str, str]], aws_region: str = "us-west-2") ) path_short = r["path"][-35:] if len(r["path"]) > 35 else r["path"] print( - f"{path_short:<35} {r['type']:<8} {health_emoji} {r['health_score']:.1%} " - f"{r['total_files']:<8} {r['total_size_gb']:<10.2f} {len(r['recommendations'])}" + f"{path_short:<35} {r['type']:<8} {health_emoji} " + f"{r['health_score']:.1%} {r['total_files']:<8} " + f"{r['total_size_gb']:<10.2f} {len(r['recommendations'])}" ) print() @@ -161,5 +163,7 @@ def monitor_tables(tables: List[Tuple[str, str]], aws_region: str = "us-west-2") # Optional: Save results to a file # import json - # with open(f"health_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", "w") as f: + # with open( + # f"health_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", "w" + # ) as f: # json.dump(results, f, indent=2) diff --git a/pyproject.toml b/pyproject.toml index 0585704..0ea2463 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "drainage" -version = "0.1.4" +version = "0.1.7" description = "High-performance data lake health analyzer for Delta Lake and Apache Iceberg" readme = "README.md" requires-python = ">=3.8" diff --git a/src/iceberg.rs b/src/iceberg.rs index 01c5530..15b0ab8 100644 --- a/src/iceberg.rs +++ b/src/iceberg.rs @@ -365,7 +365,7 @@ impl IcebergAnalyzer { fn generate_recommendations(&self, metrics: &mut HealthMetrics) { // Add warnings about incomplete analysis sections let mut incomplete_sections = Vec::new(); - + if metrics.schema_evolution.is_none() { incomplete_sections.push("Schema Evolution"); } @@ -378,14 +378,14 @@ impl IcebergAnalyzer { if metrics.file_compaction.is_none() { incomplete_sections.push("File Compaction"); } - + if !incomplete_sections.is_empty() { metrics.recommendations.push(format!( "⚠️ Analysis incomplete: {} sections could not be analyzed due to missing/inaccessible metadata files (common in actively updated tables). Basic metrics are still accurate.", incomplete_sections.join(", ") )); } - + // Check for unreferenced files if !metrics.unreferenced_files.is_empty() { metrics.recommendations.push(format!( @@ -726,7 +726,7 @@ impl IcebergAnalyzer { Ok(c) => c, Err(_) => continue, }; - + let metadata: Value = match serde_json::from_slice(&content) { Ok(m) => m, Err(_) => continue, @@ -966,7 +966,7 @@ impl IcebergAnalyzer { Ok(c) => c, Err(_) => continue, }; - + let metadata: Value = match serde_json::from_slice(&content) { Ok(m) => m, Err(_) => continue, @@ -1133,7 +1133,7 @@ impl IcebergAnalyzer { Ok(c) => c, Err(_) => continue, }; - + let metadata: Value = match serde_json::from_slice(&content) { Ok(m) => m, Err(_) => continue, @@ -1402,7 +1402,7 @@ impl IcebergAnalyzer { Ok(c) => c, Err(_) => continue, }; - + let metadata: Value = match serde_json::from_slice(&content) { Ok(m) => m, Err(_) => continue,