From ffb57244d227305c0872bcebc91da38ac44026d7 Mon Sep 17 00:00:00 2001 From: "zor@zurich.ibm.com" Date: Wed, 17 Dec 2025 22:55:43 +0100 Subject: [PATCH] feat(isthmus): configurable fallback to dynamic function mapping --- isthmus/build.gradle.kts | 1 + .../io/substrait/isthmus/FeatureBoard.java | 18 ++ .../isthmus/SimpleExtensionToSqlOperator.java | 40 +++- .../io/substrait/isthmus/SqlToSubstrait.java | 42 +++- .../isthmus/SubstraitRelNodeConverter.java | 126 ++++++++++-- .../isthmus/SubstraitRelVisitor.java | 96 ++++++++- .../isthmus/expression/FunctionConverter.java | 55 ++++- .../IgnoreNullableAndParameters.java | 39 +++- ...oFallbackDynamicFunctionRoundtripTest.java | 194 ++++++++++++++++++ .../io/substrait/isthmus/PlanTestBase.java | 71 ++++++- 10 files changed, 641 insertions(+), 41 deletions(-) create mode 100644 isthmus/src/test/java/io/substrait/isthmus/AutoFallbackDynamicFunctionRoundtripTest.java diff --git a/isthmus/build.gradle.kts b/isthmus/build.gradle.kts index ad6ce6230..37a5d8fd1 100644 --- a/isthmus/build.gradle.kts +++ b/isthmus/build.gradle.kts @@ -105,6 +105,7 @@ dependencies { testImplementation(platform(libs.junit.bom)) testImplementation(libs.junit.jupiter) testRuntimeOnly(libs.junit.platform.launcher) + testRuntimeOnly(libs.slf4j.jdk14) implementation(libs.guava) implementation(libs.protobuf.java.util) { exclude("com.google.guava", "guava") diff --git a/isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java b/isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java index a54f24146..c303ebbfb 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java +++ b/isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java @@ -32,4 +32,22 @@ public Casing unquotedCasing() { public boolean allowDynamicUdfs() { return false; } + + /** + * Controls whether to automatically create mappings for all unmapped functions using + * SimpleExtensionToSqlOperator. + * + *

When enabled, functions from extension YAML files that are not explicitly mapped in + * FunctionMappings will be automatically mapped to Calcite SqlOperators. This allows custom and + * dynamic functions to be used in SQL queries without manual mapping configuration. + * + *

This feature is disabled by default for backward compatibility. + * + * @return true if automatic fallback to dynamic function mapping should be enabled; false + * otherwise (default) + */ + @Value.Default + public boolean autoFallbackToDynamicFunctionMapping() { + return false; + } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java index 3c61acd94..fbafc78b8 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java @@ -45,9 +45,45 @@ public static List from( SimpleExtension.ExtensionCollection collection, RelDataTypeFactory typeFactory, TypeConverter typeConverter) { - // TODO: add support for windows functions return Stream.concat( - collection.scalarFunctions().stream(), collection.aggregateFunctions().stream()) + Stream.concat( + collection.scalarFunctions().stream(), collection.aggregateFunctions().stream()), + collection.windowFunctions().stream()) + .map(function -> toSqlFunction(function, typeFactory, typeConverter)) + .collect(Collectors.toList()); + } + + /** + * Converts a list of functions to SqlOperators. Handles scalar, aggregate, and window functions. + * + * @param functions list of functions to convert + * @param typeFactory the Calcite type factory + * @return list of SqlOperators + */ + public static List from( + List functions, RelDataTypeFactory typeFactory) { + return from(functions, typeFactory, TypeConverter.DEFAULT); + } + + /** + * Converts a list of functions to SqlOperators. Handles scalar, aggregate, and window functions. + * + *

Each function variant is converted to a separate SqlOperator. Functions with the same base + * name but different type signatures (e.g., strftime:ts_str, strftime:ts_string) are ALL added to + * the operator table. Calcite will try to match the function call arguments against all available + * operators and select the one that matches. This allows functions with multiple signatures to be + * used correctly without explicit deduplication. + * + * @param functions list of functions to convert + * @param typeFactory the Calcite type factory + * @param typeConverter the type converter + * @return list of SqlOperators + */ + public static List from( + List functions, + RelDataTypeFactory typeFactory, + TypeConverter typeConverter) { + return functions.stream() .map(function -> toSqlFunction(function, typeFactory, typeConverter)) .collect(Collectors.toList()); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index e60494244..4085c4ca9 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -32,6 +32,8 @@ public SqlToSubstrait(FeatureBoard features) { public SqlToSubstrait(SimpleExtension.ExtensionCollection extensions, FeatureBoard features) { super(features, extensions); + List dynamicOperators = new java.util.ArrayList<>(); + if (featureBoard.allowDynamicUdfs()) { SimpleExtension.ExtensionCollection dynamicExtensionCollection = ExtensionUtils.getDynamicExtensions(extensions); @@ -39,13 +41,43 @@ public SqlToSubstrait(SimpleExtension.ExtensionCollection extensions, FeatureBoa || !dynamicExtensionCollection.aggregateFunctions().isEmpty()) { List generatedDynamicOperators = SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, this.factory); - this.operatorTable = - SqlOperatorTables.chain( - SubstraitOperatorTable.INSTANCE, SqlOperatorTables.of(generatedDynamicOperators)); - return; + dynamicOperators.addAll(generatedDynamicOperators); + } + } + + if (featureBoard.autoFallbackToDynamicFunctionMapping()) { + List unmappedScalars = + io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions( + extensions.scalarFunctions(), + io.substrait.isthmus.expression.FunctionMappings.SCALAR_SIGS); + List unmappedAggregates = + io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions( + extensions.aggregateFunctions(), + io.substrait.isthmus.expression.FunctionMappings.AGGREGATE_SIGS); + List unmappedWindows = + io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions( + extensions.windowFunctions(), + io.substrait.isthmus.expression.FunctionMappings.WINDOW_SIGS); + + if (!unmappedScalars.isEmpty()) { + dynamicOperators.addAll(SimpleExtensionToSqlOperator.from(unmappedScalars, this.factory)); } + if (!unmappedAggregates.isEmpty()) { + dynamicOperators.addAll( + SimpleExtensionToSqlOperator.from(unmappedAggregates, this.factory)); + } + if (!unmappedWindows.isEmpty()) { + dynamicOperators.addAll(SimpleExtensionToSqlOperator.from(unmappedWindows, this.factory)); + } + } + + if (!dynamicOperators.isEmpty()) { + this.operatorTable = + SqlOperatorTables.chain( + SubstraitOperatorTable.INSTANCE, SqlOperatorTables.of(dynamicOperators)); + } else { + this.operatorTable = SubstraitOperatorTable.INSTANCE; } - this.operatorTable = SubstraitOperatorTable.INSTANCE; } /** diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index 47daf97e2..b44e06d07 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -87,6 +87,8 @@ import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.RelBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * RelVisitor to convert Substrait Rel plan to Calcite RelNode plan. Unsupported Rel node will call @@ -95,6 +97,8 @@ public class SubstraitRelNodeConverter extends AbstractRelVisitor { + private static final Logger LOGGER = LoggerFactory.getLogger(SubstraitRelNodeConverter.class); + protected final RelDataTypeFactory typeFactory; protected final ScalarFunctionConverter scalarFunctionConverter; @@ -120,9 +124,9 @@ public SubstraitRelNodeConverter( this( typeFactory, relBuilder, - createScalarFunctionConverter(extensions, typeFactory, featureBoard.allowDynamicUdfs()), - new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory), - new WindowFunctionConverter(extensions.windowFunctions(), typeFactory), + createScalarFunctionConverter(extensions, typeFactory, featureBoard), + createAggregateFunctionConverter(extensions, typeFactory, featureBoard), + createWindowFunctionConverter(extensions, typeFactory, featureBoard), TypeConverter.DEFAULT); } @@ -165,11 +169,11 @@ public SubstraitRelNodeConverter( private static ScalarFunctionConverter createScalarFunctionConverter( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory, - boolean allowDynamicUdfs) { + FeatureBoard featureBoard) { - List additionalSignatures; + List additionalSignatures = new ArrayList<>(); - if (allowDynamicUdfs) { + if (featureBoard.allowDynamicUdfs()) { java.util.Set knownFunctionNames = FunctionMappings.SCALAR_SIGS.stream() .map(FunctionMappings.Sig::name) @@ -180,28 +184,124 @@ private static ScalarFunctionConverter createScalarFunctionConverter( .filter(f -> !knownFunctionNames.contains(f.name().toLowerCase())) .collect(Collectors.toList()); - if (dynamicFunctions.isEmpty()) { - additionalSignatures = Collections.emptyList(); - } else { + if (!dynamicFunctions.isEmpty()) { SimpleExtension.ExtensionCollection dynamicExtensionCollection = SimpleExtension.ExtensionCollection.builder().scalarFunctions(dynamicFunctions).build(); List dynamicOperators = SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory); - additionalSignatures = + additionalSignatures.addAll( dynamicOperators.stream() .map(op -> FunctionMappings.s(op, op.getName())) - .collect(Collectors.toList()); + .collect(Collectors.toList())); + } + } + + if (featureBoard.autoFallbackToDynamicFunctionMapping()) { + List unmappedFunctions = + io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions( + extensions.scalarFunctions(), FunctionMappings.SCALAR_SIGS); + + if (!unmappedFunctions.isEmpty()) { + LOGGER.info( + "Dynamically mapping {} unmapped scalar functions: {}", + unmappedFunctions.size(), + unmappedFunctions.stream().map(f -> f.name()).collect(Collectors.toList())); + + List dynamicOperators = + SimpleExtensionToSqlOperator.from(unmappedFunctions, typeFactory); + + // Note: We use last-wins deduplication here because: + // 1. Multiple variants of the same function create separate SqlOperator instances + // 2. Calcite's SqlOperator equality is based on name and kind, not identity + // 3. RexCalls may use any one of these equivalent operators + // 4. We only need ONE SqlOperator registered per function name as a key in signatures map + // 5. The FunctionFinder will match all variants based on type signatures + java.util.Map operatorsByName = new java.util.LinkedHashMap<>(); + for (SqlOperator op : dynamicOperators) { + operatorsByName.put(op.getName().toLowerCase(), op); + } + + additionalSignatures.addAll( + operatorsByName.values().stream() + .map(op -> FunctionMappings.s(op, op.getName().toLowerCase())) + .collect(Collectors.toList())); } - } else { - additionalSignatures = Collections.emptyList(); } return new ScalarFunctionConverter( extensions.scalarFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT); } + private static AggregateFunctionConverter createAggregateFunctionConverter( + SimpleExtension.ExtensionCollection extensions, + RelDataTypeFactory typeFactory, + FeatureBoard featureBoard) { + + List additionalSignatures = new ArrayList<>(); + + if (featureBoard.autoFallbackToDynamicFunctionMapping()) { + List unmappedFunctions = + io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions( + extensions.aggregateFunctions(), FunctionMappings.AGGREGATE_SIGS); + + if (!unmappedFunctions.isEmpty()) { + List dynamicOperators = + SimpleExtensionToSqlOperator.from(unmappedFunctions, typeFactory); + + // Deduplicate operators by name (last-wins precedence) since multiple variants + // of the same function create multiple SqlOperator objects + java.util.Map operatorsByName = new java.util.LinkedHashMap<>(); + for (SqlOperator op : dynamicOperators) { + operatorsByName.put(op.getName().toLowerCase(), op); + } + + additionalSignatures.addAll( + operatorsByName.values().stream() + .map(op -> FunctionMappings.s(op, op.getName().toLowerCase())) + .collect(Collectors.toList())); + } + } + + return new AggregateFunctionConverter( + extensions.aggregateFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT); + } + + private static WindowFunctionConverter createWindowFunctionConverter( + SimpleExtension.ExtensionCollection extensions, + RelDataTypeFactory typeFactory, + FeatureBoard featureBoard) { + + List additionalSignatures = new ArrayList<>(); + + if (featureBoard.autoFallbackToDynamicFunctionMapping()) { + List unmappedFunctions = + io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions( + extensions.windowFunctions(), FunctionMappings.WINDOW_SIGS); + + if (!unmappedFunctions.isEmpty()) { + List dynamicOperators = + SimpleExtensionToSqlOperator.from(unmappedFunctions, typeFactory); + + // Deduplicate operators by name (last-wins precedence) since multiple variants + // of the same function create multiple SqlOperator objects + java.util.Map operatorsByName = new java.util.LinkedHashMap<>(); + for (SqlOperator op : dynamicOperators) { + operatorsByName.put(op.getName().toLowerCase(), op); + } + + additionalSignatures.addAll( + operatorsByName.values().stream() + .map(op -> FunctionMappings.s(op, op.getName().toLowerCase())) + .collect(Collectors.toList())); + } + } + + return new WindowFunctionConverter( + extensions.windowFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT); + } + public static RelNode convert( Rel relRoot, RelOptCluster relOptCluster, diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java index 835d8493d..b9357d944 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java @@ -9,6 +9,7 @@ import io.substrait.isthmus.calcite.rel.CreateView; import io.substrait.isthmus.expression.AggregateFunctionConverter; import io.substrait.isthmus.expression.CallConverters; +import io.substrait.isthmus.expression.FunctionConverter; import io.substrait.isthmus.expression.FunctionMappings; import io.substrait.isthmus.expression.LiteralConverter; import io.substrait.isthmus.expression.RexExpressionConverter; @@ -67,11 +68,14 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.util.ImmutableBitSet; import org.immutables.value.Value; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @SuppressWarnings("UnstableApiUsage") @Value.Enclosing public class SubstraitRelVisitor extends RelNodeVisitor { + private static final Logger LOGGER = LoggerFactory.getLogger(SubstraitRelVisitor.class); private static final FeatureBoard FEATURES_DEFAULT = ImmutableFeatureBoard.builder().build(); private static final Expression.BoolLiteral TRUE = ExpressionCreator.bool(false, true); @@ -95,20 +99,43 @@ public SubstraitRelVisitor( ArrayList converters = new ArrayList<>(); converters.addAll(CallConverters.defaults(typeConverter)); + // Handle scalar functions + List scalarAdditionalSignatures = new ArrayList<>(); + if (features.allowDynamicUdfs()) { SimpleExtension.ExtensionCollection dynamicExtensionCollection = ExtensionUtils.getDynamicExtensions(extensions); List dynamicOperators = SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory); - List additionalSignatures = + List udfSignatures = dynamicOperators.stream() .map(op -> FunctionMappings.s(op, op.getName())) .collect(Collectors.toList()); + scalarAdditionalSignatures.addAll(udfSignatures); + } + + if (features.autoFallbackToDynamicFunctionMapping()) { + List unmappedScalars = + FunctionConverter.getUnmappedFunctions( + extensions.scalarFunctions(), FunctionMappings.SCALAR_SIGS); + if (!unmappedScalars.isEmpty()) { + List unmappedOperators = + SimpleExtensionToSqlOperator.from(unmappedScalars, typeFactory); + List unmappedSignatures = + unmappedOperators.stream() + .map(op -> FunctionMappings.s(op, op.getName().toLowerCase())) + .collect(Collectors.toList()); + scalarAdditionalSignatures.addAll(unmappedSignatures); + LOGGER.debug("Dynamically mapped {} unmapped scalar functions", unmappedSignatures.size()); + } + } + + if (!scalarAdditionalSignatures.isEmpty()) { converters.add( new ScalarFunctionConverter( extensions.scalarFunctions(), - additionalSignatures, + scalarAdditionalSignatures, typeFactory, TypeConverter.DEFAULT)); } else { @@ -116,10 +143,67 @@ public SubstraitRelVisitor( } converters.add(CallConverters.CREATE_SEARCH_CONV.apply(new RexBuilder(typeFactory))); - this.aggregateFunctionConverter = - new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory); - WindowFunctionConverter windowFunctionConverter = - new WindowFunctionConverter(extensions.windowFunctions(), typeFactory); + + // Handle aggregate functions + AggregateFunctionConverter aggregateFunctionConverter; + if (features.autoFallbackToDynamicFunctionMapping()) { + List unmappedAggregates = + FunctionConverter.getUnmappedFunctions( + extensions.aggregateFunctions(), FunctionMappings.AGGREGATE_SIGS); + if (!unmappedAggregates.isEmpty()) { + List unmappedOperators = + SimpleExtensionToSqlOperator.from(unmappedAggregates, typeFactory); + List unmappedSignatures = + unmappedOperators.stream() + .map(op -> FunctionMappings.s(op, op.getName().toLowerCase())) + .collect(Collectors.toList()); + aggregateFunctionConverter = + new AggregateFunctionConverter( + extensions.aggregateFunctions(), + unmappedSignatures, + typeFactory, + TypeConverter.DEFAULT); + LOGGER.debug( + "Dynamically mapped {} unmapped aggregate functions", unmappedSignatures.size()); + } else { + aggregateFunctionConverter = + new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory); + } + } else { + aggregateFunctionConverter = + new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory); + } + + this.aggregateFunctionConverter = aggregateFunctionConverter; + + // Handle window functions + WindowFunctionConverter windowFunctionConverter; + if (features.autoFallbackToDynamicFunctionMapping()) { + List unmappedWindows = + FunctionConverter.getUnmappedFunctions( + extensions.windowFunctions(), FunctionMappings.WINDOW_SIGS); + if (!unmappedWindows.isEmpty()) { + List unmappedOperators = + SimpleExtensionToSqlOperator.from(unmappedWindows, typeFactory); + List unmappedSignatures = + unmappedOperators.stream() + .map(op -> FunctionMappings.s(op, op.getName().toLowerCase())) + .collect(Collectors.toList()); + windowFunctionConverter = + new WindowFunctionConverter( + extensions.windowFunctions(), + unmappedSignatures, + typeFactory, + TypeConverter.DEFAULT); + LOGGER.debug("Dynamically mapped {} unmapped window functions", unmappedSignatures.size()); + } else { + windowFunctionConverter = + new WindowFunctionConverter(extensions.windowFunctions(), typeFactory); + } + } else { + windowFunctionConverter = + new WindowFunctionConverter(extensions.windowFunctions(), typeFactory); + } this.rexExpressionConverter = new RexExpressionConverter(this, converters, windowFunctionConverter, typeConverter); this.featureBoard = features; diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java index b5604d4d9..303399fd4 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java @@ -240,9 +240,30 @@ private Optional signatureMatch(List inputTypes, Type outputType) { for (F function : functions) { List args = function.requiredArguments(); // Make sure that arguments & return are within bounds and match the types - if (function.returnType() instanceof ParameterizedType - && isMatch(outputType, (ParameterizedType) function.returnType()) - && inputTypesMatchDefinedArguments(inputTypes, args)) { + boolean returnTypeMatches; + Object funcReturnType = function.returnType(); + + if (funcReturnType instanceof ParameterizedType) { + returnTypeMatches = isMatch(outputType, (ParameterizedType) funcReturnType); + } else if (funcReturnType instanceof Type) { + // For non-parameterized return types, check if they match + Type targetType = (Type) funcReturnType; + if (outputType instanceof ParameterizedType) { + // outputType is parameterized but targetType is not - use visitor pattern + returnTypeMatches = + ((ParameterizedType) outputType) + .accept(new IgnoreNullableAndParameters(targetType)); + } else { + // Both are non-parameterized types - compare them directly by using the visitor + // Create a simple visitor that just checks class equality + returnTypeMatches = outputType.getClass().equals(targetType.getClass()); + } + } else { + // If function.returnType() is neither Type nor ParameterizedType, skip it + returnTypeMatches = false; + } + + if (returnTypeMatches && inputTypesMatchDefinedArguments(inputTypes, args)) { return Optional.of(function); } } @@ -476,6 +497,13 @@ public Optional attemptMatch(C call, Function topLevelCo if (leastRestrictive.isPresent()) { return leastRestrictive; } + } else { + // Fallback: try matchCoerced even if singularInputType is empty + // This handles functions with mixed argument types like strftime(timestamp, string) + Optional coerced = matchCoerced(call, outputType, operands); + if (coerced.isPresent()) { + return coerced; + } } return Optional.empty(); } @@ -565,4 +593,25 @@ private static boolean isMatch(ParameterizedType actualType, ParameterizedType t } return actualType.accept(new IgnoreNullableAndParameters(targetType)); } + + /** + * Identifies functions that are not mapped in the provided Sig list. + * + * @param functions the list of function variants to check + * @param sigs the list of mapped Sig signatures + * @return a list of functions that are not found in the Sig mappings (case-insensitive name + * comparison) + */ + public static List getUnmappedFunctions( + List functions, ImmutableList sigs) { + Set mappedNames = + sigs.stream() + .map(FunctionMappings.Sig::name) + .map(name -> name.toLowerCase(Locale.ROOT)) + .collect(Collectors.toSet()); + + return functions.stream() + .filter(fn -> !mappedNames.contains(fn.name().toLowerCase(Locale.ROOT))) + .collect(Collectors.toList()); + } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/IgnoreNullableAndParameters.java b/isthmus/src/main/java/io/substrait/isthmus/expression/IgnoreNullableAndParameters.java index f8b4be1dd..7ad7380cd 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/IgnoreNullableAndParameters.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/IgnoreNullableAndParameters.java @@ -50,7 +50,12 @@ public Boolean visit(Type.FP64 type) { @Override public Boolean visit(Type.Str type) { - return typeToMatch instanceof Type.Str; + // Treat all string types as compatible: Str, VarChar, and FixedChar + return typeToMatch instanceof Type.Str + || typeToMatch instanceof Type.VarChar + || typeToMatch instanceof Type.FixedChar + || typeToMatch instanceof ParameterizedType.VarChar + || typeToMatch instanceof ParameterizedType.FixedChar; } @Override @@ -108,13 +113,22 @@ public Boolean visit(Type.UserDefined type) throws RuntimeException { @Override public Boolean visit(Type.FixedChar type) { + // Treat all string types as compatible: Str, VarChar, and FixedChar return typeToMatch instanceof Type.FixedChar - || typeToMatch instanceof ParameterizedType.FixedChar; + || typeToMatch instanceof ParameterizedType.FixedChar + || typeToMatch instanceof Type.Str + || typeToMatch instanceof Type.VarChar + || typeToMatch instanceof ParameterizedType.VarChar; } @Override public Boolean visit(Type.VarChar type) { - return typeToMatch instanceof Type.VarChar || typeToMatch instanceof ParameterizedType.VarChar; + // Treat all string types as compatible: Str, VarChar, and FixedChar + return typeToMatch instanceof Type.VarChar + || typeToMatch instanceof ParameterizedType.VarChar + || typeToMatch instanceof Type.Str + || typeToMatch instanceof Type.FixedChar + || typeToMatch instanceof ParameterizedType.FixedChar; } @Override @@ -131,18 +145,21 @@ public Boolean visit(Type.Decimal type) { @Override public Boolean visit(Type.PrecisionTime type) { return typeToMatch instanceof Type.PrecisionTime + || typeToMatch instanceof Type.Time || typeToMatch instanceof ParameterizedType.PrecisionTime; } @Override public Boolean visit(Type.PrecisionTimestamp type) { return typeToMatch instanceof Type.PrecisionTimestamp + || typeToMatch instanceof Type.Timestamp || typeToMatch instanceof ParameterizedType.PrecisionTimestamp; } @Override public Boolean visit(Type.PrecisionTimestampTZ type) { return typeToMatch instanceof Type.PrecisionTimestampTZ + || typeToMatch instanceof Type.TimestampTZ || typeToMatch instanceof ParameterizedType.PrecisionTimestampTZ; } @@ -164,13 +181,22 @@ public Boolean visit(Type.Map type) { @Override public Boolean visit(ParameterizedType.FixedChar expr) throws RuntimeException { + // Treat all string types as compatible: Str, VarChar, and FixedChar return typeToMatch instanceof Type.FixedChar - || typeToMatch instanceof ParameterizedType.FixedChar; + || typeToMatch instanceof ParameterizedType.FixedChar + || typeToMatch instanceof Type.Str + || typeToMatch instanceof Type.VarChar + || typeToMatch instanceof ParameterizedType.VarChar; } @Override public Boolean visit(ParameterizedType.VarChar expr) throws RuntimeException { - return typeToMatch instanceof Type.VarChar || typeToMatch instanceof ParameterizedType.VarChar; + // Treat all string types as compatible: Str, VarChar, and FixedChar + return typeToMatch instanceof Type.VarChar + || typeToMatch instanceof ParameterizedType.VarChar + || typeToMatch instanceof Type.Str + || typeToMatch instanceof Type.FixedChar + || typeToMatch instanceof ParameterizedType.FixedChar; } @Override @@ -199,18 +225,21 @@ public Boolean visit(ParameterizedType.IntervalCompound expr) throws RuntimeExce @Override public Boolean visit(ParameterizedType.PrecisionTime expr) throws RuntimeException { return typeToMatch instanceof Type.PrecisionTime + || typeToMatch instanceof Type.Time || typeToMatch instanceof ParameterizedType.PrecisionTime; } @Override public Boolean visit(ParameterizedType.PrecisionTimestamp expr) throws RuntimeException { return typeToMatch instanceof Type.PrecisionTimestamp + || typeToMatch instanceof Type.Timestamp || typeToMatch instanceof ParameterizedType.PrecisionTimestamp; } @Override public Boolean visit(ParameterizedType.PrecisionTimestampTZ expr) throws RuntimeException { return typeToMatch instanceof Type.PrecisionTimestampTZ + || typeToMatch instanceof Type.TimestampTZ || typeToMatch instanceof ParameterizedType.PrecisionTimestampTZ; } diff --git a/isthmus/src/test/java/io/substrait/isthmus/AutoFallbackDynamicFunctionRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/AutoFallbackDynamicFunctionRoundtripTest.java new file mode 100644 index 000000000..00b1c12d3 --- /dev/null +++ b/isthmus/src/test/java/io/substrait/isthmus/AutoFallbackDynamicFunctionRoundtripTest.java @@ -0,0 +1,194 @@ +package io.substrait.isthmus; + +import io.substrait.expression.ExpressionCreator; +import io.substrait.extension.DefaultExtensionCatalog; +import io.substrait.plan.Plan; +import io.substrait.relation.NamedScan; +import io.substrait.relation.Project; +import java.util.List; +import org.junit.jupiter.api.Test; + +/** + * Roundtrip test for the autoFallbackToDynamicFunctionMapping feature. + * + *

This test verifies that: 1. Substrait plans using unmapped functions (like strftime from + * extensions) are built with SubstraitBuilder 2. With autoFallbackToDynamicFunctionMapping enabled, + * these unmapped functions are dynamically mapped to Calcite operators 3. The roundtrip conversion + * (Substrait → Calcite → Substrait) is stable, including for SQL queries + * + *

The test uses unmapped datetime functions like strftime that are defined in extension YAML but + * not in FunctionMappings. + */ +class AutoFallbackDynamicFunctionRoundtripTest extends PlanTestBase { + + AutoFallbackDynamicFunctionRoundtripTest() { + super(DefaultExtensionCatalog.DEFAULT_COLLECTION); + } + + /** + * Test roundtrip with unmapped strftime function using SubstraitBuilder. + * + *

This test builds a Substrait plan using SubstraitBuilder that calls the unmapped + * strftime:ts_str function (which exists in functions_datetime.yaml but NOT in FunctionMappings). + */ + @Test + void testUnmappedStrftimeRoundtrip() { + + // Build table scan + NamedScan table = substraitBuilder.namedScan(List.of("t"), List.of("ts"), List.of(R.TIMESTAMP)); + + // Create project with unmapped strftime function call + Project project = + substraitBuilder.project( + input -> + List.of( + // Call unmapped strftime function: strftime(ts, '%Y-%m-%d') + substraitBuilder.scalarFn( + DefaultExtensionCatalog.FUNCTIONS_DATETIME, + "strftime:ts_str", + R.STRING, + substraitBuilder.fieldReference(input, 0), + ExpressionCreator.string(false, "%Y-%m-%d"))), + table); + + // Build plan with output field names + // Note: Project creates a struct with 2 fields internally, so we need 2 names + Plan plan = + Plan.builder() + .roots( + List.of( + Plan.Root.builder() + .input(project) + .names(List.of("formatted_date", "_ignored")) + .build())) + .build(); + + // Enable autoFallbackToDynamicFunctionMapping + FeatureBoard featureBoard = + ImmutableFeatureBoard.builder().autoFallbackToDynamicFunctionMapping(true).build(); + + // Use PlanTestBase helper method for comprehensive roundtrip testing with FeatureBoard + assertFullRoundTrip(plan.getRoots().get(0), featureBoard); + } + + /** + * Test roundtrip with multiple unmapped functions from default extensions. + * + *

This test builds Substrait plans with multiple unmapped functions from the default + * extensions (functions_datetime.yaml) using SubstraitBuilder, then verifies that + * autoFallbackToDynamicFunctionMapping enables dynamic mapping of these functions during + * Substrait → Calcite → Substrait roundtrip conversions. + */ + @Test + void testMultipleUnmappedFunctionsRoundtrip() { + // Build table scan + NamedScan table = substraitBuilder.namedScan(List.of("t"), List.of("ts"), List.of(R.TIMESTAMP)); + + // Create project with multiple unmapped strftime function calls from default extensions + Project project = + substraitBuilder.project( + input -> + List.of( + // First unmapped strftime call: strftime(ts, '%Y-%m-%d') + substraitBuilder.scalarFn( + DefaultExtensionCatalog.FUNCTIONS_DATETIME, + "strftime:ts_str", + R.STRING, + substraitBuilder.fieldReference(input, 0), + ExpressionCreator.string(false, "%Y-%m-%d")), + // Second unmapped strftime call: strftime(ts, '%H:%M:%S') + substraitBuilder.scalarFn( + DefaultExtensionCatalog.FUNCTIONS_DATETIME, + "strftime:ts_str", + R.STRING, + substraitBuilder.fieldReference(input, 0), + ExpressionCreator.string(false, "%H:%M:%S"))), + table); + + // Build plan with output field names + // Project creates a struct with 3 fields internally (input field + 2 output fields) + Plan plan = + Plan.builder() + .roots( + List.of( + Plan.Root.builder() + .input(project) + .names(List.of("date_str", "time_str", "_ignored")) + .build())) + .build(); + + // Enable autoFallbackToDynamicFunctionMapping + FeatureBoard featureBoard = + ImmutableFeatureBoard.builder().autoFallbackToDynamicFunctionMapping(true).build(); + + // Use PlanTestBase helper method for comprehensive roundtrip testing with FeatureBoard + assertFullRoundTrip(plan.getRoots().get(0), featureBoard); + } + + /** + * Test roundtrip with SQL query using unmapped strftime function. + * + *

This test verifies that SQL queries with unmapped functions (strftime from + * functions_datetime.yaml) can be parsed, validated, and converted to Substrait and back when + * autoFallbackToDynamicFunctionMapping is enabled. The feature flag should enable the + * SqlToSubstrait converter to populate its operator table with unmapped function signatures, + * allowing SqlValidator to accept them during SQL parsing. + */ + @Test + void testUnmappedStrftimeSqlRoundtrip() throws Exception { + String createStatements = "CREATE TABLE t (ts TIMESTAMP)"; + String query = "SELECT strftime(ts, '%Y-%m-%d') FROM t"; + + // Enable autoFallbackToDynamicFunctionMapping to populate operator table with unmapped + // functions + FeatureBoard featureBoard = + ImmutableFeatureBoard.builder().autoFallbackToDynamicFunctionMapping(true).build(); + + // Perform roundtrip: SQL → Substrait → Calcite → Substrait → Calcite → Substrait + // Uses loose POJO comparison since dynamic function mapping may transform the structure + assertSqlSubstraitRelRoundTripLoosePojoComparison(query, createStatements, featureBoard); + } + + /** + * Test roundtrip with SQL query using multiple unmapped functions. + * + *

This test verifies that SQL queries with multiple unmapped function calls can be handled + * when autoFallbackToDynamicFunctionMapping is enabled. The operator table is populated with + * unmapped function signatures, allowing all occurrences of the unmapped functions to be + * recognized during SQL parsing and conversion. + */ + @Test + void testMultipleUnmappedFunctionsSqlRoundtrip() throws Exception { + String createStatements = "CREATE TABLE t (date_str VARCHAR, ts_str VARCHAR)"; + String query = + "SELECT strptime_date(date_str, '%Y-%m-%d') AS parsed_date, strptime_timestamp(ts_str, '%Y-%m-%d %H:%M:%S') AS parsed_ts FROM t"; + + // Enable autoFallbackToDynamicFunctionMapping + FeatureBoard featureBoard = + ImmutableFeatureBoard.builder().autoFallbackToDynamicFunctionMapping(true).build(); + + // Perform roundtrip with multiple unmapped function calls + assertSqlSubstraitRelRoundTripLoosePojoComparison(query, createStatements, featureBoard); + } + + /** + * Test roundtrip with multiple calls to the same unmapped function. + * + *

This test verifies that an unmapped function can be called multiple times in the same query + * and all calls are properly converted and matched. This ensures the function matching logic + * works correctly even with repeated function calls. + */ + @Test + void testMultipleCallsToSameFunctionSqlRoundtrip() throws Exception { + String createStatements = "CREATE TABLE t (ts TIMESTAMP)"; + String query = + "SELECT strftime(ts, '%Y-%m-%d') AS formatted1, strftime(ts, '%H:%M:%S') AS formatted2 FROM t"; + + // Enable autoFallbackToDynamicFunctionMapping + FeatureBoard featureBoard = + ImmutableFeatureBoard.builder().autoFallbackToDynamicFunctionMapping(true).build(); + + // Perform roundtrip with multiple calls to the same function + assertSqlSubstraitRelRoundTripLoosePojoComparison(query, createStatements, featureBoard); + } +} diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index a37916bc4..37883af83 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -222,6 +222,27 @@ protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( query, catalogReader, ImmutableFeatureBoard.builder().build()); } + /** + * Overload of {@link #assertSqlSubstraitRelRoundTripLoosePojoComparison(String, + * Prepare.CatalogReader, FeatureBoard)} that accepts SQL CREATE statements as a string. + */ + protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( + String query, String createStatements, FeatureBoard featureBoard) throws Exception { + Prepare.CatalogReader catalogReader = + SubstraitCreateStatementParser.processCreateStatementsToCatalog(createStatements); + return assertSqlSubstraitRelRoundTripLoosePojoComparison(query, catalogReader, featureBoard); + } + + /** + * Convenience overload of {@link #assertSqlSubstraitRelRoundTripLoosePojoComparison(String, + * String, FeatureBoard)} with default FeatureBoard behavior (no dynamic UDFs). + */ + protected RelRoot assertSqlSubstraitRelRoundTripLoosePojoComparison( + String query, String createStatements) throws Exception { + return assertSqlSubstraitRelRoundTripLoosePojoComparison( + query, createStatements, ImmutableFeatureBoard.builder().build()); + } + @Beta protected void assertFullRoundTrip(String query) throws SqlParseException { assertFullRoundTrip(query, TPCH_CATALOG); @@ -393,6 +414,31 @@ protected void assertFullRoundTrip(Rel pojo1) { * */ protected void assertFullRoundTrip(Plan.Root pojo1) { + assertFullRoundTrip(pojo1, ImmutableFeatureBoard.builder().build()); + } + + /** + * Verifies that the given POJO can be converted with a specific FeatureBoard, with stability + * verification across multiple roundtrips. + * + *

Unlike the default {@link #assertFullRoundTrip(Plan.Root)}, this overload does NOT compare + * the initial POJO to the converted result, due to potential transformations during dynamic + * function mapping. Instead, it verifies roundtrip stability: that the second and third + * roundtrips produce identical results. + * + *

+ * + * This overload is useful for testing features like autoFallbackToDynamicFunctionMapping. + * + * @param pojo1 the Substrait plan root to test + * @param featureBoard the FeatureBoard configuration to use for conversions + */ + protected void assertFullRoundTrip(Plan.Root pojo1, FeatureBoard featureBoard) { ExtensionCollector extensionCollector = new ExtensionCollector(); // Substrait POJO 1 -> Substrait Proto @@ -402,17 +448,28 @@ protected void assertFullRoundTrip(Plan.Root pojo1) { io.substrait.plan.Plan.Root pojo2 = new ProtoRelConverter(extensionCollector, extensions).from(proto); - // Verify that POJOs are the same + // Verify that initial POJOs are the same assertEquals(pojo1, pojo2); - // Substrait POJO 2 -> Calcite - RelRoot calcite = new SubstraitToCalcite(extensions, typeFactory).convert(pojo2); + // Substrait POJO 2 -> Calcite with FeatureBoard (first roundtrip) + SubstraitToCalcite substraitToCalcite = + new SubstraitToCalcite(extensions, typeFactory, TypeConverter.DEFAULT, null, featureBoard); + RelRoot calcite = substraitToCalcite.convert(pojo2); - // Calcite -> Substrait POJO 3 - io.substrait.plan.Plan.Root pojo3 = SubstraitRelVisitor.convert(calcite, extensions); + // Calcite -> Substrait POJO 3 with FeatureBoard + io.substrait.plan.Plan.Root pojo3 = + SubstraitRelVisitor.convert(calcite, extensions, featureBoard); - // Verify that POJOs are the same - assertEquals(pojo1, pojo3); + // Note: pojo2 and pojo3 may differ due to dynamic function mapping and optimization + // changes, so we do NOT compare them directly. + + // Second roundtrip for stability verification + RelRoot calcite2 = substraitToCalcite.convert(pojo3); + io.substrait.plan.Plan.Root pojo4 = + SubstraitRelVisitor.convert(calcite2, extensions, featureBoard); + + // Verify stability: pojo3 should equal pojo4 (second and third roundtrips are stable) + assertEquals(pojo3, pojo4); } protected void assertRowMatch(RelDataType actual, Type... expected) {