Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions isthmus/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ dependencies {
testImplementation(platform(libs.junit.bom))
testImplementation(libs.junit.jupiter)
testRuntimeOnly(libs.junit.platform.launcher)
testRuntimeOnly(libs.slf4j.jdk14)
implementation(libs.guava)
implementation(libs.protobuf.java.util) {
exclude("com.google.guava", "guava")
Expand Down
18 changes: 18 additions & 0 deletions isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,22 @@ public Casing unquotedCasing() {
public boolean allowDynamicUdfs() {
return false;
}

/**
* Controls whether to automatically create mappings for all unmapped functions using
* SimpleExtensionToSqlOperator.
*
* <p>When enabled, functions from extension YAML files that are not explicitly mapped in
* FunctionMappings will be automatically mapped to Calcite SqlOperators. This allows custom and
* dynamic functions to be used in SQL queries without manual mapping configuration.
*
* <p>This feature is disabled by default for backward compatibility.
*
* @return true if automatic fallback to dynamic function mapping should be enabled; false
* otherwise (default)
*/
@Value.Default
public boolean autoFallbackToDynamicFunctionMapping() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,45 @@ public static List<SqlOperator> from(
SimpleExtension.ExtensionCollection collection,
RelDataTypeFactory typeFactory,
TypeConverter typeConverter) {
// TODO: add support for windows functions
return Stream.concat(
collection.scalarFunctions().stream(), collection.aggregateFunctions().stream())
Stream.concat(
collection.scalarFunctions().stream(), collection.aggregateFunctions().stream()),
collection.windowFunctions().stream())
.map(function -> toSqlFunction(function, typeFactory, typeConverter))
.collect(Collectors.toList());
}

/**
* Converts a list of functions to SqlOperators. Handles scalar, aggregate, and window functions.
*
* @param functions list of functions to convert
* @param typeFactory the Calcite type factory
* @return list of SqlOperators
*/
public static List<SqlOperator> from(
List<? extends SimpleExtension.Function> functions, RelDataTypeFactory typeFactory) {
return from(functions, typeFactory, TypeConverter.DEFAULT);
}

/**
* Converts a list of functions to SqlOperators. Handles scalar, aggregate, and window functions.
*
* <p>Each function variant is converted to a separate SqlOperator. Functions with the same base
* name but different type signatures (e.g., strftime:ts_str, strftime:ts_string) are ALL added to
* the operator table. Calcite will try to match the function call arguments against all available
* operators and select the one that matches. This allows functions with multiple signatures to be
* used correctly without explicit deduplication.
*
* @param functions list of functions to convert
* @param typeFactory the Calcite type factory
* @param typeConverter the type converter
* @return list of SqlOperators
*/
public static List<SqlOperator> from(
List<? extends SimpleExtension.Function> functions,
RelDataTypeFactory typeFactory,
TypeConverter typeConverter) {
return functions.stream()
.map(function -> toSqlFunction(function, typeFactory, typeConverter))
.collect(Collectors.toList());
}
Expand Down
42 changes: 37 additions & 5 deletions isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,52 @@ public SqlToSubstrait(FeatureBoard features) {
public SqlToSubstrait(SimpleExtension.ExtensionCollection extensions, FeatureBoard features) {
super(features, extensions);

List<SqlOperator> dynamicOperators = new java.util.ArrayList<>();

if (featureBoard.allowDynamicUdfs()) {
SimpleExtension.ExtensionCollection dynamicExtensionCollection =
ExtensionUtils.getDynamicExtensions(extensions);
if (!dynamicExtensionCollection.scalarFunctions().isEmpty()
|| !dynamicExtensionCollection.aggregateFunctions().isEmpty()) {
List<SqlOperator> generatedDynamicOperators =
SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, this.factory);
this.operatorTable =
SqlOperatorTables.chain(
SubstraitOperatorTable.INSTANCE, SqlOperatorTables.of(generatedDynamicOperators));
return;
dynamicOperators.addAll(generatedDynamicOperators);
}
}

if (featureBoard.autoFallbackToDynamicFunctionMapping()) {
List<SimpleExtension.ScalarFunctionVariant> unmappedScalars =
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
extensions.scalarFunctions(),
io.substrait.isthmus.expression.FunctionMappings.SCALAR_SIGS);
List<SimpleExtension.AggregateFunctionVariant> unmappedAggregates =
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
extensions.aggregateFunctions(),
io.substrait.isthmus.expression.FunctionMappings.AGGREGATE_SIGS);
List<SimpleExtension.WindowFunctionVariant> unmappedWindows =
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
extensions.windowFunctions(),
io.substrait.isthmus.expression.FunctionMappings.WINDOW_SIGS);

if (!unmappedScalars.isEmpty()) {
dynamicOperators.addAll(SimpleExtensionToSqlOperator.from(unmappedScalars, this.factory));
}
if (!unmappedAggregates.isEmpty()) {
dynamicOperators.addAll(
SimpleExtensionToSqlOperator.from(unmappedAggregates, this.factory));
}
if (!unmappedWindows.isEmpty()) {
dynamicOperators.addAll(SimpleExtensionToSqlOperator.from(unmappedWindows, this.factory));
}
}

if (!dynamicOperators.isEmpty()) {
this.operatorTable =
SqlOperatorTables.chain(
SubstraitOperatorTable.INSTANCE, SqlOperatorTables.of(dynamicOperators));
} else {
this.operatorTable = SubstraitOperatorTable.INSTANCE;
}
this.operatorTable = SubstraitOperatorTable.INSTANCE;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.RelBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* RelVisitor to convert Substrait Rel plan to Calcite RelNode plan. Unsupported Rel node will call
Expand All @@ -95,6 +97,8 @@
public class SubstraitRelNodeConverter
extends AbstractRelVisitor<RelNode, SubstraitRelNodeConverter.Context, RuntimeException> {

private static final Logger LOGGER = LoggerFactory.getLogger(SubstraitRelNodeConverter.class);

protected final RelDataTypeFactory typeFactory;

protected final ScalarFunctionConverter scalarFunctionConverter;
Expand All @@ -120,9 +124,9 @@ public SubstraitRelNodeConverter(
this(
typeFactory,
relBuilder,
createScalarFunctionConverter(extensions, typeFactory, featureBoard.allowDynamicUdfs()),
new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory),
new WindowFunctionConverter(extensions.windowFunctions(), typeFactory),
createScalarFunctionConverter(extensions, typeFactory, featureBoard),
createAggregateFunctionConverter(extensions, typeFactory, featureBoard),
createWindowFunctionConverter(extensions, typeFactory, featureBoard),
TypeConverter.DEFAULT);
}

Expand Down Expand Up @@ -165,11 +169,11 @@ public SubstraitRelNodeConverter(
private static ScalarFunctionConverter createScalarFunctionConverter(
SimpleExtension.ExtensionCollection extensions,
RelDataTypeFactory typeFactory,
boolean allowDynamicUdfs) {
FeatureBoard featureBoard) {

List<FunctionMappings.Sig> additionalSignatures;
List<FunctionMappings.Sig> additionalSignatures = new ArrayList<>();

if (allowDynamicUdfs) {
if (featureBoard.allowDynamicUdfs()) {
java.util.Set<String> knownFunctionNames =
FunctionMappings.SCALAR_SIGS.stream()
.map(FunctionMappings.Sig::name)
Expand All @@ -180,28 +184,124 @@ private static ScalarFunctionConverter createScalarFunctionConverter(
.filter(f -> !knownFunctionNames.contains(f.name().toLowerCase()))
.collect(Collectors.toList());

if (dynamicFunctions.isEmpty()) {
additionalSignatures = Collections.emptyList();
} else {
if (!dynamicFunctions.isEmpty()) {
SimpleExtension.ExtensionCollection dynamicExtensionCollection =
SimpleExtension.ExtensionCollection.builder().scalarFunctions(dynamicFunctions).build();

List<SqlOperator> dynamicOperators =
SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory);

additionalSignatures =
additionalSignatures.addAll(
dynamicOperators.stream()
.map(op -> FunctionMappings.s(op, op.getName()))
.collect(Collectors.toList());
.collect(Collectors.toList()));
}
}

if (featureBoard.autoFallbackToDynamicFunctionMapping()) {
List<SimpleExtension.ScalarFunctionVariant> unmappedFunctions =
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
extensions.scalarFunctions(), FunctionMappings.SCALAR_SIGS);

if (!unmappedFunctions.isEmpty()) {
LOGGER.info(
"Dynamically mapping {} unmapped scalar functions: {}",
unmappedFunctions.size(),
unmappedFunctions.stream().map(f -> f.name()).collect(Collectors.toList()));

List<SqlOperator> dynamicOperators =
SimpleExtensionToSqlOperator.from(unmappedFunctions, typeFactory);

// Note: We use last-wins deduplication here because:
// 1. Multiple variants of the same function create separate SqlOperator instances
// 2. Calcite's SqlOperator equality is based on name and kind, not identity
// 3. RexCalls may use any one of these equivalent operators
// 4. We only need ONE SqlOperator registered per function name as a key in signatures map
// 5. The FunctionFinder will match all variants based on type signatures
java.util.Map<String, SqlOperator> operatorsByName = new java.util.LinkedHashMap<>();
for (SqlOperator op : dynamicOperators) {
operatorsByName.put(op.getName().toLowerCase(), op);
}

additionalSignatures.addAll(
operatorsByName.values().stream()
.map(op -> FunctionMappings.s(op, op.getName().toLowerCase()))
.collect(Collectors.toList()));
}
} else {
additionalSignatures = Collections.emptyList();
}

return new ScalarFunctionConverter(
extensions.scalarFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT);
}

private static AggregateFunctionConverter createAggregateFunctionConverter(
SimpleExtension.ExtensionCollection extensions,
RelDataTypeFactory typeFactory,
FeatureBoard featureBoard) {

List<FunctionMappings.Sig> additionalSignatures = new ArrayList<>();

if (featureBoard.autoFallbackToDynamicFunctionMapping()) {
List<SimpleExtension.AggregateFunctionVariant> unmappedFunctions =
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
extensions.aggregateFunctions(), FunctionMappings.AGGREGATE_SIGS);

if (!unmappedFunctions.isEmpty()) {
List<SqlOperator> dynamicOperators =
SimpleExtensionToSqlOperator.from(unmappedFunctions, typeFactory);

// Deduplicate operators by name (last-wins precedence) since multiple variants
// of the same function create multiple SqlOperator objects
java.util.Map<String, SqlOperator> operatorsByName = new java.util.LinkedHashMap<>();
for (SqlOperator op : dynamicOperators) {
operatorsByName.put(op.getName().toLowerCase(), op);
}

additionalSignatures.addAll(
operatorsByName.values().stream()
.map(op -> FunctionMappings.s(op, op.getName().toLowerCase()))
.collect(Collectors.toList()));
}
}

return new AggregateFunctionConverter(
extensions.aggregateFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT);
}

private static WindowFunctionConverter createWindowFunctionConverter(
SimpleExtension.ExtensionCollection extensions,
RelDataTypeFactory typeFactory,
FeatureBoard featureBoard) {

List<FunctionMappings.Sig> additionalSignatures = new ArrayList<>();

if (featureBoard.autoFallbackToDynamicFunctionMapping()) {
List<SimpleExtension.WindowFunctionVariant> unmappedFunctions =
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
extensions.windowFunctions(), FunctionMappings.WINDOW_SIGS);

if (!unmappedFunctions.isEmpty()) {
List<SqlOperator> dynamicOperators =
SimpleExtensionToSqlOperator.from(unmappedFunctions, typeFactory);

// Deduplicate operators by name (last-wins precedence) since multiple variants
// of the same function create multiple SqlOperator objects
java.util.Map<String, SqlOperator> operatorsByName = new java.util.LinkedHashMap<>();
for (SqlOperator op : dynamicOperators) {
operatorsByName.put(op.getName().toLowerCase(), op);
}

additionalSignatures.addAll(
operatorsByName.values().stream()
.map(op -> FunctionMappings.s(op, op.getName().toLowerCase()))
.collect(Collectors.toList()));
}
}

return new WindowFunctionConverter(
extensions.windowFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT);
}

public static RelNode convert(
Rel relRoot,
RelOptCluster relOptCluster,
Expand Down
Loading