From 2bf85f6c582ecf9080e1579213f471e00faac917 Mon Sep 17 00:00:00 2001 From: Vrishabh Date: Sat, 17 Jan 2026 11:12:25 +0000 Subject: [PATCH] Fix aggregate pushdown --- .../java/org/apache/iceberg/expressions/MaxAggregate.java | 6 ++++++ .../java/org/apache/iceberg/expressions/MinAggregate.java | 6 ++++++ .../org/apache/iceberg/spark/sql/TestAggregatePushDown.java | 3 ++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/MaxAggregate.java b/api/src/main/java/org/apache/iceberg/expressions/MaxAggregate.java index d37af7470df2..ea9f9ab78e84 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/MaxAggregate.java +++ b/api/src/main/java/org/apache/iceberg/expressions/MaxAggregate.java @@ -40,6 +40,12 @@ protected MaxAggregate(BoundTerm term) { @Override protected boolean hasValue(DataFile file) { + // If nanValueCounts > 0 , it could be -NaN or NaN and hence we cant conclude the max value is + // NaN + Long nanCount = safeGet(file.nanValueCounts(), fieldId); + if (nanCount != null && nanCount > 0) { + return false; + } boolean hasBound = safeContainsKey(file.upperBounds(), fieldId); Long valueCount = safeGet(file.valueCounts(), fieldId); Long nullCount = safeGet(file.nullValueCounts(), fieldId); diff --git a/api/src/main/java/org/apache/iceberg/expressions/MinAggregate.java b/api/src/main/java/org/apache/iceberg/expressions/MinAggregate.java index 667b66d6500d..5498124a9ad0 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/MinAggregate.java +++ b/api/src/main/java/org/apache/iceberg/expressions/MinAggregate.java @@ -40,6 +40,12 @@ protected MinAggregate(BoundTerm term) { @Override protected boolean hasValue(DataFile file) { + // If nanValueCounts > 0 , it could be -NaN or NaN and hence we cant conclude the min value is + // -NaN + Long nanCount = safeGet(file.nanValueCounts(), fieldId); + if (nanCount != null && nanCount > 0) { + return false; + } boolean hasBound = safeContainsKey(file.lowerBounds(), fieldId); Long valueCount = safeGet(file.valueCounts(), fieldId); Long nullCount = safeGet(file.nullValueCounts(), fieldId); diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java index ce0a0f26a096..5b4f04d12256 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/sql/TestAggregatePushDown.java @@ -740,6 +740,7 @@ public void testNaN() { sql( "INSERT INTO %s VALUES (1, float('nan'))," + "(1, float('nan')), " + + "(1, 10.0), " + "(2, 2), " + "(2, float('nan')), " + "(3, float('nan')), " @@ -762,7 +763,7 @@ public void testNaN() { List actual = sql(select, tableName); List expected = Lists.newArrayList(); - expected.add(new Object[] {6L, Float.NaN, 1.0F, 6L}); + expected.add(new Object[] {7L, Float.NaN, 1.0F, 7L}); assertEquals("expected and actual should equal", expected, actual); }