From af07d424664c859c8aebf735f036ca4ffefb7ef4 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 29 Jan 2026 11:29:23 -0800 Subject: [PATCH 01/67] feat(metrics): Add MetricsPersistence SPI for backend-agnostic metrics storage This commit introduces a Service Provider Interface (SPI) for persisting Iceberg metrics reports, addressing the extensibility concerns raised in the design review. Key components: - MetricsPersistence: Main SPI interface with write and query operations - NoOpMetricsPersistence: Default do-nothing implementation for backends that don't support metrics persistence - ScanMetricsRecord: Immutable interface for scan metrics data - CommitMetricsRecord: Immutable interface for commit metrics data - MetricsQueryCriteria: Query parameters with filtering and pagination - MetricsContext: Context for conversion (realm, catalog, principal info) - MetricsPersistenceFactory: Factory for realm-scoped instances - MetricsRecordConverter: Converts Iceberg reports to SPI records Design principles: - Backend-agnostic: Can be implemented by JDBC, NoSQL, or custom backends - No instanceof checks: Service code calls interface methods directly - Idempotent writes: Same reportId written twice has no effect - Graceful degradation: Unsupported backends return empty results Relates to: #3337 --- .../metrics/CommitMetricsRecord.java | 165 +++++++++++++++ .../persistence/metrics/MetricsContext.java | 65 ++++++ .../metrics/MetricsPersistence.java | 103 ++++++++++ .../metrics/MetricsPersistenceFactory.java | 41 ++++ .../metrics/MetricsQueryCriteria.java | 139 +++++++++++++ .../metrics/MetricsRecordConverter.java | 188 ++++++++++++++++++ .../metrics/NoOpMetricsPersistence.java | 63 ++++++ .../metrics/ScanMetricsRecord.java | 164 +++++++++++++++ 8 files changed, 928 insertions(+) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java new file mode 100644 index 0000000000..25a1044a82 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.Map; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Backend-agnostic representation of an Iceberg commit metrics report. + * + *

This record captures all relevant metrics from an Iceberg {@code CommitReport} along with + * contextual information such as realm, catalog, and request correlation data. + */ +@PolarisImmutable +public interface CommitMetricsRecord { + + // === Identification === + + /** Unique identifier for this report (UUID). */ + String reportId(); + + /** Multi-tenancy realm identifier. */ + String realmId(); + + /** Internal catalog ID. */ + String catalogId(); + + /** Human-readable catalog name. */ + String catalogName(); + + /** Dot-separated namespace path (e.g., "db.schema"). */ + String namespace(); + + /** Table name. */ + String tableName(); + + // === Timing === + + /** Timestamp when the report was received. */ + Instant timestamp(); + + // === Request Context === + + /** Name of the principal who initiated the operation. */ + Optional principalName(); + + /** Request ID for correlation. */ + Optional requestId(); + + /** OpenTelemetry trace ID for distributed tracing. */ + Optional otelTraceId(); + + /** OpenTelemetry span ID for distributed tracing. */ + Optional otelSpanId(); + + /** Trace ID from the report itself (may differ from OTel trace). */ + Optional reportTraceId(); + + // === Commit Context === + + /** Snapshot ID created by this commit. */ + long snapshotId(); + + /** Sequence number of the snapshot. */ + Optional sequenceNumber(); + + /** Operation type (e.g., "append", "overwrite", "delete"). */ + String operation(); + + // === File Metrics - Data Files === + + /** Number of data files added. */ + long addedDataFiles(); + + /** Number of data files removed. */ + long removedDataFiles(); + + /** Total number of data files after commit. */ + long totalDataFiles(); + + // === File Metrics - Delete Files === + + /** Number of delete files added. */ + long addedDeleteFiles(); + + /** Number of delete files removed. */ + long removedDeleteFiles(); + + /** Total number of delete files after commit. */ + long totalDeleteFiles(); + + /** Number of equality delete files added. */ + long addedEqualityDeleteFiles(); + + /** Number of equality delete files removed. */ + long removedEqualityDeleteFiles(); + + /** Number of positional delete files added. */ + long addedPositionalDeleteFiles(); + + /** Number of positional delete files removed. */ + long removedPositionalDeleteFiles(); + + // === Record Metrics === + + /** Number of records added. */ + long addedRecords(); + + /** Number of records removed. */ + long removedRecords(); + + /** Total number of records after commit. */ + long totalRecords(); + + // === Size Metrics === + + /** Size of added files in bytes. */ + long addedFileSizeBytes(); + + /** Size of removed files in bytes. */ + long removedFileSizeBytes(); + + /** Total file size in bytes after commit. */ + long totalFileSizeBytes(); + + // === Timing === + + /** Total duration of the commit in milliseconds. */ + Optional totalDurationMs(); + + /** Number of commit attempts. */ + int attempts(); + + // === Extensibility === + + /** Additional metadata as key-value pairs. */ + Map metadata(); + + /** + * Creates a new builder for CommitMetricsRecord. + * + * @return a new builder instance + */ + static ImmutableCommitMetricsRecord.Builder builder() { + return ImmutableCommitMetricsRecord.builder(); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java new file mode 100644 index 0000000000..d63800f82f --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Context information needed when converting Iceberg metrics reports to persistence records. + * + *

This context captures information from the request environment that is not available in the + * Iceberg report itself, such as realm, catalog, principal, and tracing information. + */ +@PolarisImmutable +public interface MetricsContext { + + /** Multi-tenancy realm identifier. */ + String realmId(); + + /** Internal catalog ID. */ + String catalogId(); + + /** Human-readable catalog name. */ + String catalogName(); + + /** Dot-separated namespace path (e.g., "db.schema"). */ + String namespace(); + + /** Name of the principal who initiated the operation. */ + Optional principalName(); + + /** Request ID for correlation. */ + Optional requestId(); + + /** OpenTelemetry trace ID for distributed tracing. */ + Optional otelTraceId(); + + /** OpenTelemetry span ID for distributed tracing. */ + Optional otelSpanId(); + + /** + * Creates a new builder for MetricsContext. + * + * @return a new builder instance + */ + static ImmutableMetricsContext.Builder builder() { + return ImmutableMetricsContext.builder(); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java new file mode 100644 index 0000000000..f13058933a --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import jakarta.annotation.Nonnull; +import java.util.List; + +/** + * Service Provider Interface (SPI) for persisting Iceberg metrics reports. + * + *

This interface enables different persistence backends (JDBC, NoSQL, custom) to implement + * metrics storage in a way appropriate for their storage model, while allowing service code to + * remain backend-agnostic. + * + *

Implementations should be idempotent - writing the same reportId twice should have no effect. + * Implementations that don't support metrics persistence should return {@link #NOOP}. + */ +public interface MetricsPersistence { + + /** A no-op implementation for backends that don't support metrics persistence. */ + MetricsPersistence NOOP = new NoOpMetricsPersistence(); + + // ============================================================================ + // Capability Detection + // ============================================================================ + + /** + * Returns whether this persistence backend supports metrics storage. + * + *

Backends that do not support metrics should return false. Service code should NOT use this + * to branch with instanceof checks - instead, call the interface methods directly and rely on the + * no-op behavior for unsupported backends. + * + * @return true if metrics persistence is supported, false otherwise + */ + boolean isSupported(); + + // ============================================================================ + // Write Operations + // ============================================================================ + + /** + * Persists a scan metrics record. + * + *

This operation is idempotent - writing the same reportId twice has no effect. If {@link + * #isSupported()} returns false, this is a no-op. + * + * @param record the scan metrics record to persist + */ + void writeScanReport(@Nonnull ScanMetricsRecord record); + + /** + * Persists a commit metrics record. + * + *

This operation is idempotent - writing the same reportId twice has no effect. If {@link + * #isSupported()} returns false, this is a no-op. + * + * @param record the commit metrics record to persist + */ + void writeCommitReport(@Nonnull CommitMetricsRecord record); + + // ============================================================================ + // Query Operations + // ============================================================================ + + /** + * Queries scan metrics reports based on the specified criteria. + * + *

Returns an empty list if {@link #isSupported()} returns false. + * + * @param criteria the query criteria + * @return list of matching scan metrics records, or empty list if not supported + */ + @Nonnull + List queryScanReports(@Nonnull MetricsQueryCriteria criteria); + + /** + * Queries commit metrics reports based on the specified criteria. + * + *

Returns an empty list if {@link #isSupported()} returns false. + * + * @param criteria the query criteria + * @return list of matching commit metrics records, or empty list if not supported + */ + @Nonnull + List queryCommitReports(@Nonnull MetricsQueryCriteria criteria); +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java new file mode 100644 index 0000000000..90a56e1ec3 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import org.apache.polaris.core.context.RealmContext; + +/** + * Factory interface for creating {@link MetricsPersistence} instances. + * + *

Implementations may cache instances per realm for efficiency. For backends that don't support + * metrics persistence, implementations should return {@link MetricsPersistence#NOOP}. + */ +public interface MetricsPersistenceFactory { + + /** + * Gets or creates a {@link MetricsPersistence} instance for the given realm. + * + *

Implementations may cache instances per realm. If the persistence backend does not support + * metrics persistence, this method should return {@link MetricsPersistence#NOOP}. + * + * @param realmContext the realm context + * @return a MetricsPersistence instance for the realm, or NOOP if not supported + */ + MetricsPersistence getOrCreateMetricsPersistence(RealmContext realmContext); +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java new file mode 100644 index 0000000000..0336351079 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; +import org.immutables.value.Value; + +/** + * Query criteria for retrieving metrics reports. + * + *

This class defines the parameters that can be used to filter and paginate metrics query + * results. Not all backends may support all query patterns - check the implementation documentation + * for supported query patterns and required indexes. + * + *

Supported Query Patterns

+ * + * + * + * + * + * + * + *
PatternFields UsedIndex Required
By Table + TimecatalogName, namespace, tableName, startTime, endTimeYes (OSS)
By Trace IDotelTraceIdYes (OSS)
By PrincipalprincipalNameNo (custom deployment)
By Time OnlystartTime, endTimePartial (timestamp index)
+ */ +@PolarisImmutable +public interface MetricsQueryCriteria { + + // === Table Identification (optional) === + + /** Catalog name to filter by. */ + Optional catalogName(); + + /** Namespace to filter by (dot-separated). */ + Optional namespace(); + + /** Table name to filter by. */ + Optional tableName(); + + // === Time Range === + + /** Start time for the query (inclusive). */ + Optional startTime(); + + /** End time for the query (exclusive). */ + Optional endTime(); + + // === Correlation === + + /** OpenTelemetry trace ID to filter by. */ + Optional otelTraceId(); + + /** + * Principal name to filter by. + * + *

Note: This query pattern may require a custom index in deployment environments. The OSS + * codebase does not include an index for principal-based queries. + */ + Optional principalName(); + + // === Pagination === + + /** Maximum number of results to return. Defaults to 100. */ + @Value.Default + default int limit() { + return 100; + } + + /** Number of results to skip. Defaults to 0. */ + @Value.Default + default int offset() { + return 0; + } + + /** + * Creates a new builder for MetricsQueryCriteria. + * + * @return a new builder instance + */ + static ImmutableMetricsQueryCriteria.Builder builder() { + return ImmutableMetricsQueryCriteria.builder(); + } + + /** + * Creates criteria for querying by table and time range. + * + * @param catalogName the catalog name + * @param namespace the namespace (dot-separated) + * @param tableName the table name + * @param startTime the start time (inclusive) + * @param endTime the end time (exclusive) + * @param limit maximum number of results + * @return the query criteria + */ + static MetricsQueryCriteria forTable( + String catalogName, + String namespace, + String tableName, + Instant startTime, + Instant endTime, + int limit) { + return builder() + .catalogName(catalogName) + .namespace(namespace) + .tableName(tableName) + .startTime(startTime) + .endTime(endTime) + .limit(limit) + .build(); + } + + /** + * Creates criteria for querying by OpenTelemetry trace ID. + * + * @param traceId the trace ID to search for + * @param limit maximum number of results + * @return the query criteria + */ + static MetricsQueryCriteria forTraceId(String traceId, int limit) { + return builder().otelTraceId(traceId).limit(limit).build(); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java new file mode 100644 index 0000000000..f790e09658 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.Collections; +import java.util.Optional; +import java.util.UUID; +import org.apache.iceberg.metrics.CommitMetricsResult; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.CounterResult; +import org.apache.iceberg.metrics.ScanMetricsResult; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.iceberg.metrics.TimerResult; + +/** + * Utility class for converting Iceberg metrics reports to SPI record types. + * + *

This converter extracts all relevant metrics from Iceberg's {@link ScanReport} and {@link + * CommitReport} and combines them with context information to create persistence-ready records. + */ +public final class MetricsRecordConverter { + + private MetricsRecordConverter() { + // Utility class + } + + /** + * Converts an Iceberg ScanReport to a ScanMetricsRecord. + * + * @param scanReport the Iceberg scan report + * @param tableName the table name + * @param context the metrics context containing realm, catalog, and request information + * @return the scan metrics record ready for persistence + */ + public static ScanMetricsRecord fromScanReport( + ScanReport scanReport, String tableName, MetricsContext context) { + ScanMetricsResult metrics = scanReport.scanMetrics(); + + return ScanMetricsRecord.builder() + .reportId(UUID.randomUUID().toString()) + .realmId(context.realmId()) + .catalogId(context.catalogId()) + .catalogName(context.catalogName()) + .namespace(context.namespace()) + .tableName(tableName) + .timestamp(Instant.now()) + .principalName(context.principalName()) + .requestId(context.requestId()) + .otelTraceId(context.otelTraceId()) + .otelSpanId(context.otelSpanId()) + .reportTraceId(getMetadataValue(scanReport.metadata(), "trace-id")) + .snapshotId(Optional.of(scanReport.snapshotId())) + .schemaId(Optional.of(scanReport.schemaId())) + .filterExpression( + scanReport.filter() != null + ? Optional.of(scanReport.filter().toString()) + : Optional.empty()) + .projectedFieldIds( + scanReport.projectedFieldIds() != null + ? scanReport.projectedFieldIds() + : Collections.emptyList()) + .projectedFieldNames( + scanReport.projectedFieldNames() != null + ? scanReport.projectedFieldNames() + : Collections.emptyList()) + .resultDataFiles(getCounterValue(metrics.resultDataFiles())) + .resultDeleteFiles(getCounterValue(metrics.resultDeleteFiles())) + .totalFileSizeBytes(getCounterValue(metrics.totalFileSizeInBytes())) + .totalDataManifests(getCounterValue(metrics.totalDataManifests())) + .totalDeleteManifests(getCounterValue(metrics.totalDeleteManifests())) + .scannedDataManifests(getCounterValue(metrics.scannedDataManifests())) + .scannedDeleteManifests(getCounterValue(metrics.scannedDeleteManifests())) + .skippedDataManifests(getCounterValue(metrics.skippedDataManifests())) + .skippedDeleteManifests(getCounterValue(metrics.skippedDeleteManifests())) + .skippedDataFiles(getCounterValue(metrics.skippedDataFiles())) + .skippedDeleteFiles(getCounterValue(metrics.skippedDeleteFiles())) + .totalPlanningDurationMs(getTimerValueMs(metrics.totalPlanningDuration())) + .equalityDeleteFiles(getCounterValue(metrics.equalityDeleteFiles())) + .positionalDeleteFiles(getCounterValue(metrics.positionalDeleteFiles())) + .indexedDeleteFiles(getCounterValue(metrics.indexedDeleteFiles())) + .totalDeleteFileSizeBytes(getCounterValue(metrics.totalDeleteFileSizeInBytes())) + .metadata(Collections.emptyMap()) + .build(); + } + + /** + * Converts an Iceberg CommitReport to a CommitMetricsRecord. + * + * @param commitReport the Iceberg commit report + * @param tableName the table name + * @param context the metrics context containing realm, catalog, and request information + * @return the commit metrics record ready for persistence + */ + public static CommitMetricsRecord fromCommitReport( + CommitReport commitReport, String tableName, MetricsContext context) { + CommitMetricsResult metrics = commitReport.commitMetrics(); + + return CommitMetricsRecord.builder() + .reportId(UUID.randomUUID().toString()) + .realmId(context.realmId()) + .catalogId(context.catalogId()) + .catalogName(context.catalogName()) + .namespace(context.namespace()) + .tableName(tableName) + .timestamp(Instant.now()) + .principalName(context.principalName()) + .requestId(context.requestId()) + .otelTraceId(context.otelTraceId()) + .otelSpanId(context.otelSpanId()) + .reportTraceId(getMetadataValue(commitReport.metadata(), "trace-id")) + .snapshotId(commitReport.snapshotId()) + .sequenceNumber(Optional.of(commitReport.sequenceNumber())) + .operation(commitReport.operation()) + .addedDataFiles(getCounterValue(metrics.addedDataFiles())) + .removedDataFiles(getCounterValue(metrics.removedDataFiles())) + .totalDataFiles(getCounterValue(metrics.totalDataFiles())) + .addedDeleteFiles(getCounterValue(metrics.addedDeleteFiles())) + .removedDeleteFiles(getCounterValue(metrics.removedDeleteFiles())) + .totalDeleteFiles(getCounterValue(metrics.totalDeleteFiles())) + .addedEqualityDeleteFiles(getCounterValue(metrics.addedEqualityDeleteFiles())) + .removedEqualityDeleteFiles(getCounterValue(metrics.removedEqualityDeleteFiles())) + .addedPositionalDeleteFiles(getCounterValue(metrics.addedPositionalDeleteFiles())) + .removedPositionalDeleteFiles(getCounterValue(metrics.removedPositionalDeleteFiles())) + .addedRecords(getCounterValue(metrics.addedRecords())) + .removedRecords(getCounterValue(metrics.removedRecords())) + .totalRecords(getCounterValue(metrics.totalRecords())) + .addedFileSizeBytes(getCounterValue(metrics.addedFilesSizeInBytes())) + .removedFileSizeBytes(getCounterValue(metrics.removedFilesSizeInBytes())) + .totalFileSizeBytes(getCounterValue(metrics.totalFilesSizeInBytes())) + .totalDurationMs(getTimerValueMsOpt(metrics.totalDuration())) + .attempts(getCounterValueInt(metrics.attempts())) + .metadata(Collections.emptyMap()) + .build(); + } + + private static long getCounterValue(CounterResult counter) { + if (counter == null) { + return 0L; + } + return counter.value(); + } + + private static int getCounterValueInt(CounterResult counter) { + if (counter == null) { + return 0; + } + return (int) counter.value(); + } + + private static long getTimerValueMs(TimerResult timer) { + if (timer == null || timer.totalDuration() == null) { + return 0L; + } + return timer.totalDuration().toMillis(); + } + + private static Optional getTimerValueMsOpt(TimerResult timer) { + if (timer == null || timer.totalDuration() == null) { + return Optional.empty(); + } + return Optional.of(timer.totalDuration().toMillis()); + } + + private static Optional getMetadataValue( + java.util.Map metadata, String key) { + if (metadata == null) { + return Optional.empty(); + } + return Optional.ofNullable(metadata.get(key)); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java new file mode 100644 index 0000000000..cd41e82573 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import jakarta.annotation.Nonnull; +import java.util.Collections; +import java.util.List; + +/** + * A no-op implementation of {@link MetricsPersistence} for backends that don't support metrics + * persistence. + * + *

This implementation is used as the default when a persistence backend does not support metrics + * storage. All write operations are silently ignored, and all query operations return empty + * results. + */ +final class NoOpMetricsPersistence implements MetricsPersistence { + + NoOpMetricsPersistence() {} + + @Override + public boolean isSupported() { + return false; + } + + @Override + public void writeScanReport(@Nonnull ScanMetricsRecord record) { + // No-op + } + + @Override + public void writeCommitReport(@Nonnull CommitMetricsRecord record) { + // No-op + } + + @Nonnull + @Override + public List queryScanReports(@Nonnull MetricsQueryCriteria criteria) { + return Collections.emptyList(); + } + + @Nonnull + @Override + public List queryCommitReports(@Nonnull MetricsQueryCriteria criteria) { + return Collections.emptyList(); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java new file mode 100644 index 0000000000..f268e3bc16 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Backend-agnostic representation of an Iceberg scan metrics report. + * + *

This record captures all relevant metrics from an Iceberg {@code ScanReport} along with + * contextual information such as realm, catalog, and request correlation data. + */ +@PolarisImmutable +public interface ScanMetricsRecord { + + // === Identification === + + /** Unique identifier for this report (UUID). */ + String reportId(); + + /** Multi-tenancy realm identifier. */ + String realmId(); + + /** Internal catalog ID. */ + String catalogId(); + + /** Human-readable catalog name. */ + String catalogName(); + + /** Dot-separated namespace path (e.g., "db.schema"). */ + String namespace(); + + /** Table name. */ + String tableName(); + + // === Timing === + + /** Timestamp when the report was received. */ + Instant timestamp(); + + // === Request Context === + + /** Name of the principal who initiated the operation. */ + Optional principalName(); + + /** Request ID for correlation. */ + Optional requestId(); + + /** OpenTelemetry trace ID for distributed tracing. */ + Optional otelTraceId(); + + /** OpenTelemetry span ID for distributed tracing. */ + Optional otelSpanId(); + + /** Trace ID from the report itself (may differ from OTel trace). */ + Optional reportTraceId(); + + // === Scan Context === + + /** Snapshot ID that was scanned. */ + Optional snapshotId(); + + /** Schema ID used for the scan. */ + Optional schemaId(); + + /** Filter expression applied to the scan (as string). */ + Optional filterExpression(); + + /** List of projected field IDs. */ + List projectedFieldIds(); + + /** List of projected field names. */ + List projectedFieldNames(); + + // === Scan Metrics - File Counts === + + /** Number of data files in the result. */ + long resultDataFiles(); + + /** Number of delete files in the result. */ + long resultDeleteFiles(); + + /** Total size of files in bytes. */ + long totalFileSizeBytes(); + + // === Scan Metrics - Manifest Counts === + + /** Total number of data manifests. */ + long totalDataManifests(); + + /** Total number of delete manifests. */ + long totalDeleteManifests(); + + /** Number of data manifests that were scanned. */ + long scannedDataManifests(); + + /** Number of delete manifests that were scanned. */ + long scannedDeleteManifests(); + + /** Number of data manifests that were skipped. */ + long skippedDataManifests(); + + /** Number of delete manifests that were skipped. */ + long skippedDeleteManifests(); + + /** Number of data files that were skipped. */ + long skippedDataFiles(); + + /** Number of delete files that were skipped. */ + long skippedDeleteFiles(); + + // === Scan Metrics - Timing === + + /** Total planning duration in milliseconds. */ + long totalPlanningDurationMs(); + + // === Scan Metrics - Delete Files === + + /** Number of equality delete files. */ + long equalityDeleteFiles(); + + /** Number of positional delete files. */ + long positionalDeleteFiles(); + + /** Number of indexed delete files. */ + long indexedDeleteFiles(); + + /** Total size of delete files in bytes. */ + long totalDeleteFileSizeBytes(); + + // === Extensibility === + + /** Additional metadata as key-value pairs. */ + Map metadata(); + + /** + * Creates a new builder for ScanMetricsRecord. + * + * @return a new builder instance + */ + static ImmutableScanMetricsRecord.Builder builder() { + return ImmutableScanMetricsRecord.builder(); + } +} From bceb1360d1b052715523198e038a373f83cc0f6e Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 29 Jan 2026 13:05:30 -0800 Subject: [PATCH 02/67] refactor(metrics): Remove ambient context fields from SPI records Remove fields that can be obtained from ambient request context at write time: - principalName: Available from SecurityContext/PolarisPrincipal - requestId: Not well-defined in Polaris; unclear what request it refers to - otelTraceId/otelSpanId: Available from OTel context via Span.current() Keep reportTraceId as it's a client-provided value from the report metadata that cannot be obtained from the ambient context. Rename otelTraceId filter in MetricsQueryCriteria to reportTraceId to match the field that is actually stored in the records. This keeps the SPI focused on business data (the metrics themselves) rather than infrastructure concerns (tracing, authentication) which the persistence implementation can obtain from the ambient context at write time if needed. --- .../metrics/CommitMetricsRecord.java | 26 +++++------ .../persistence/metrics/MetricsContext.java | 19 ++------ .../metrics/MetricsQueryCriteria.java | 46 +++++++++++++------ .../metrics/MetricsRecordConverter.java | 8 ---- .../metrics/ScanMetricsRecord.java | 26 +++++------ 5 files changed, 61 insertions(+), 64 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java index 25a1044a82..9d41db8173 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java @@ -57,21 +57,19 @@ public interface CommitMetricsRecord { /** Timestamp when the report was received. */ Instant timestamp(); - // === Request Context === + // === Client Correlation === - /** Name of the principal who initiated the operation. */ - Optional principalName(); - - /** Request ID for correlation. */ - Optional requestId(); - - /** OpenTelemetry trace ID for distributed tracing. */ - Optional otelTraceId(); - - /** OpenTelemetry span ID for distributed tracing. */ - Optional otelSpanId(); - - /** Trace ID from the report itself (may differ from OTel trace). */ + /** + * Client-provided trace ID from the metrics report metadata. + * + *

This is an optional identifier that the Iceberg client may include in the report's metadata + * map (typically under the key "trace-id"). It allows clients to correlate this metrics report + * with their own distributed tracing system or query execution context. + * + *

Note: Server-side tracing information (e.g., OpenTelemetry trace/span IDs) and principal + * information are not included in this record. The persistence implementation can obtain these + * from the ambient request context (OTel context, security context) at write time if needed. + */ Optional reportTraceId(); // === Commit Context === diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java index d63800f82f..38432c12c2 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java @@ -18,14 +18,17 @@ */ package org.apache.polaris.core.persistence.metrics; -import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; /** * Context information needed when converting Iceberg metrics reports to persistence records. * *

This context captures information from the request environment that is not available in the - * Iceberg report itself, such as realm, catalog, principal, and tracing information. + * Iceberg report itself, such as realm and catalog identification. + * + *

Note: Principal and tracing information (e.g., OpenTelemetry trace/span IDs) are not included + * in this context. The persistence implementation can obtain these from the ambient request context + * (OTel context, security context) at write time if needed. */ @PolarisImmutable public interface MetricsContext { @@ -42,18 +45,6 @@ public interface MetricsContext { /** Dot-separated namespace path (e.g., "db.schema"). */ String namespace(); - /** Name of the principal who initiated the operation. */ - Optional principalName(); - - /** Request ID for correlation. */ - Optional requestId(); - - /** OpenTelemetry trace ID for distributed tracing. */ - Optional otelTraceId(); - - /** OpenTelemetry span ID for distributed tracing. */ - Optional otelSpanId(); - /** * Creates a new builder for MetricsContext. * diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index 0336351079..aa844b1424 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -35,8 +35,7 @@ * * * - * - * + * * *
PatternFields UsedIndex Required
By Table + TimecatalogName, namespace, tableName, startTime, endTimeYes (OSS)
By Trace IDotelTraceIdYes (OSS)
By PrincipalprincipalNameNo (custom deployment)
By Client Trace IDreportTraceIdNo (custom deployment)
By Time OnlystartTime, endTimePartial (timestamp index)
*/ @@ -64,26 +63,45 @@ public interface MetricsQueryCriteria { // === Correlation === - /** OpenTelemetry trace ID to filter by. */ - Optional otelTraceId(); - /** - * Principal name to filter by. + * Client-provided trace ID to filter by (from report metadata). + * + *

This matches the {@code reportTraceId} field in the metrics records, which originates from + * the client's metadata map. Useful for correlating metrics with client-side query execution. * *

Note: This query pattern may require a custom index in deployment environments. The OSS - * codebase does not include an index for principal-based queries. + * codebase does not include an index for trace-based queries. */ - Optional principalName(); + Optional reportTraceId(); // === Pagination === - /** Maximum number of results to return. Defaults to 100. */ + /** + * Maximum number of results to return. + * + *

Defaults to 100. Used together with {@link #offset()} for offset-based pagination. + */ @Value.Default default int limit() { return 100; } - /** Number of results to skip. Defaults to 0. */ + /** + * Number of results to skip before returning results. + * + *

Defaults to 0. Used for offset-based pagination where: + * + *

    + *
  • Page 1: offset=0, limit=100 → returns results 1-100 + *
  • Page 2: offset=100, limit=100 → returns results 101-200 + *
  • Page N: offset=(N-1)*limit, limit=100 → returns results for page N + *
+ * + *

Note: Offset-based pagination can be inefficient for large offsets in some databases. For + * very large result sets (>10K records), consider using time-based filtering with {@link + * #startTime()} and {@link #endTime()} to narrow the result set instead of relying on large + * offsets. + */ @Value.Default default int offset() { return 0; @@ -127,13 +145,13 @@ static MetricsQueryCriteria forTable( } /** - * Creates criteria for querying by OpenTelemetry trace ID. + * Creates criteria for querying by client-provided trace ID. * - * @param traceId the trace ID to search for + * @param reportTraceId the client trace ID to search for * @param limit maximum number of results * @return the query criteria */ - static MetricsQueryCriteria forTraceId(String traceId, int limit) { - return builder().otelTraceId(traceId).limit(limit).build(); + static MetricsQueryCriteria forReportTraceId(String reportTraceId, int limit) { + return builder().reportTraceId(reportTraceId).limit(limit).build(); } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java index f790e09658..0cb740b8cf 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java @@ -61,10 +61,6 @@ public static ScanMetricsRecord fromScanReport( .namespace(context.namespace()) .tableName(tableName) .timestamp(Instant.now()) - .principalName(context.principalName()) - .requestId(context.requestId()) - .otelTraceId(context.otelTraceId()) - .otelSpanId(context.otelSpanId()) .reportTraceId(getMetadataValue(scanReport.metadata(), "trace-id")) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) @@ -120,10 +116,6 @@ public static CommitMetricsRecord fromCommitReport( .namespace(context.namespace()) .tableName(tableName) .timestamp(Instant.now()) - .principalName(context.principalName()) - .requestId(context.requestId()) - .otelTraceId(context.otelTraceId()) - .otelSpanId(context.otelSpanId()) .reportTraceId(getMetadataValue(commitReport.metadata(), "trace-id")) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java index f268e3bc16..5e85566c14 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java @@ -58,21 +58,19 @@ public interface ScanMetricsRecord { /** Timestamp when the report was received. */ Instant timestamp(); - // === Request Context === + // === Client Correlation === - /** Name of the principal who initiated the operation. */ - Optional principalName(); - - /** Request ID for correlation. */ - Optional requestId(); - - /** OpenTelemetry trace ID for distributed tracing. */ - Optional otelTraceId(); - - /** OpenTelemetry span ID for distributed tracing. */ - Optional otelSpanId(); - - /** Trace ID from the report itself (may differ from OTel trace). */ + /** + * Client-provided trace ID from the metrics report metadata. + * + *

This is an optional identifier that the Iceberg client may include in the report's metadata + * map (typically under the key "trace-id"). It allows clients to correlate this metrics report + * with their own distributed tracing system or query execution context. + * + *

Note: Server-side tracing information (e.g., OpenTelemetry trace/span IDs) and principal + * information are not included in this record. The persistence implementation can obtain these + * from the ambient request context (OTel context, security context) at write time if needed. + */ Optional reportTraceId(); // === Scan Context === From 01d8e9383736d65055aa0d2174c3ce62a7e10aa1 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Sat, 31 Jan 2026 05:50:35 -0800 Subject: [PATCH 03/67] refactor(metrics): Replace offset-based pagination with PageToken pattern - Create ReportIdToken for cursor-based pagination using report ID (UUID) - Remove limit() and offset() from MetricsQueryCriteria - Update MetricsPersistence to use PageToken parameter and return Page - Update NoOpMetricsPersistence to return empty Page objects - Register ReportIdToken via service loader This change makes the SPI truly backend-agnostic by using the existing Polaris PageToken pattern instead of RDBMS-specific offset pagination. Each backend can implement cursors in their optimal way (keyset for RDBMS, continuation tokens for NoSQL). Addresses reviewer feedback on MetricsQueryCriteria.offset() field. --- .../metrics/MetricsPersistence.java | 57 ++++++-- .../metrics/MetricsQueryCriteria.java | 90 ++++++------- .../metrics/NoOpMetricsPersistence.java | 16 ++- .../persistence/metrics/ReportIdToken.java | 122 ++++++++++++++++++ ...ore.persistence.pagination.Token$TokenType | 1 + 5 files changed, 223 insertions(+), 63 deletions(-) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java index f13058933a..346cc77d01 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java @@ -19,7 +19,8 @@ package org.apache.polaris.core.persistence.metrics; import jakarta.annotation.Nonnull; -import java.util.List; +import org.apache.polaris.core.persistence.pagination.Page; +import org.apache.polaris.core.persistence.pagination.PageToken; /** * Service Provider Interface (SPI) for persisting Iceberg metrics reports. @@ -30,6 +31,24 @@ * *

Implementations should be idempotent - writing the same reportId twice should have no effect. * Implementations that don't support metrics persistence should return {@link #NOOP}. + * + *

Pagination

+ * + *

Query methods use the standard Polaris pagination pattern with {@link PageToken} for requests + * and {@link Page} for responses. This enables: + * + *

    + *
  • Backend-specific cursor implementations (RDBMS offset, NoSQL continuation tokens, etc.) + *
  • Consistent pagination interface across all Polaris persistence APIs + *
  • Efficient cursor-based pagination that works with large result sets + *
+ * + *

The {@link ReportIdToken} provides a cursor based on the report ID (UUID), but backends may + * use other cursor strategies internally. + * + * @see PageToken + * @see Page + * @see ReportIdToken */ public interface MetricsPersistence { @@ -82,22 +101,42 @@ public interface MetricsPersistence { /** * Queries scan metrics reports based on the specified criteria. * - *

Returns an empty list if {@link #isSupported()} returns false. + *

Returns an empty page if {@link #isSupported()} returns false. + * + *

Example usage: + * + *

{@code
+   * // First page
+   * PageToken pageToken = PageToken.fromLimit(100);
+   * Page page = persistence.queryScanReports(criteria, pageToken);
+   *
+   * // Next page (if available)
+   * String nextPageToken = page.encodedResponseToken();
+   * if (nextPageToken != null) {
+   *   pageToken = PageToken.build(nextPageToken, null, () -> true);
+   *   Page nextPage = persistence.queryScanReports(criteria, pageToken);
+   * }
+   * }
* - * @param criteria the query criteria - * @return list of matching scan metrics records, or empty list if not supported + * @param criteria the query criteria (filters) + * @param pageToken pagination parameters (page size and optional cursor) + * @return page of matching scan metrics records with continuation token if more results exist */ @Nonnull - List queryScanReports(@Nonnull MetricsQueryCriteria criteria); + Page queryScanReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken); /** * Queries commit metrics reports based on the specified criteria. * - *

Returns an empty list if {@link #isSupported()} returns false. + *

Returns an empty page if {@link #isSupported()} returns false. * - * @param criteria the query criteria - * @return list of matching commit metrics records, or empty list if not supported + * @param criteria the query criteria (filters) + * @param pageToken pagination parameters (page size and optional cursor) + * @return page of matching commit metrics records with continuation token if more results exist + * @see #queryScanReports(MetricsQueryCriteria, PageToken) for pagination example */ @Nonnull - List queryCommitReports(@Nonnull MetricsQueryCriteria criteria); + Page queryCommitReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken); } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index aa844b1424..d1989c68d2 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -21,14 +21,19 @@ import java.time.Instant; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; -import org.immutables.value.Value; /** * Query criteria for retrieving metrics reports. * - *

This class defines the parameters that can be used to filter and paginate metrics query - * results. Not all backends may support all query patterns - check the implementation documentation - * for supported query patterns and required indexes. + *

This class defines the filter parameters for metrics queries. Pagination is handled separately + * via {@link org.apache.polaris.core.persistence.pagination.PageToken}, which is passed as a + * separate parameter to query methods. This separation of concerns allows: + * + *

    + *
  • Different backends to implement pagination in their optimal way + *
  • Cursor-based pagination that works with both RDBMS and NoSQL backends + *
  • Reuse of the existing Polaris pagination infrastructure + *
* *

Supported Query Patterns

* @@ -38,6 +43,23 @@ * By Client Trace IDreportTraceIdNo (custom deployment) * By Time OnlystartTime, endTimePartial (timestamp index) * + * + *

Pagination

+ * + *

Pagination is handled via the {@link org.apache.polaris.core.persistence.pagination.PageToken} + * passed to query methods. The token contains: + * + *

    + *
  • {@code pageSize()} - Maximum number of results to return + *
  • {@code value()} - Optional cursor token (e.g., {@link ReportIdToken}) for continuation + *
+ * + *

Query results are returned as {@link org.apache.polaris.core.persistence.pagination.Page} + * which includes an encoded token for fetching the next page. + * + * @see org.apache.polaris.core.persistence.pagination.PageToken + * @see org.apache.polaris.core.persistence.pagination.Page + * @see ReportIdToken */ @PolarisImmutable public interface MetricsQueryCriteria { @@ -74,38 +96,7 @@ public interface MetricsQueryCriteria { */ Optional reportTraceId(); - // === Pagination === - - /** - * Maximum number of results to return. - * - *

Defaults to 100. Used together with {@link #offset()} for offset-based pagination. - */ - @Value.Default - default int limit() { - return 100; - } - - /** - * Number of results to skip before returning results. - * - *

Defaults to 0. Used for offset-based pagination where: - * - *

    - *
  • Page 1: offset=0, limit=100 → returns results 1-100 - *
  • Page 2: offset=100, limit=100 → returns results 101-200 - *
  • Page N: offset=(N-1)*limit, limit=100 → returns results for page N - *
- * - *

Note: Offset-based pagination can be inefficient for large offsets in some databases. For - * very large result sets (>10K records), consider using time-based filtering with {@link - * #startTime()} and {@link #endTime()} to narrow the result set instead of relying on large - * offsets. - */ - @Value.Default - default int offset() { - return 0; - } + // === Factory Methods === /** * Creates a new builder for MetricsQueryCriteria. @@ -119,39 +110,44 @@ static ImmutableMetricsQueryCriteria.Builder builder() { /** * Creates criteria for querying by table and time range. * + *

Pagination is handled separately via the {@code PageToken} parameter to query methods. + * * @param catalogName the catalog name * @param namespace the namespace (dot-separated) * @param tableName the table name * @param startTime the start time (inclusive) * @param endTime the end time (exclusive) - * @param limit maximum number of results * @return the query criteria */ static MetricsQueryCriteria forTable( - String catalogName, - String namespace, - String tableName, - Instant startTime, - Instant endTime, - int limit) { + String catalogName, String namespace, String tableName, Instant startTime, Instant endTime) { return builder() .catalogName(catalogName) .namespace(namespace) .tableName(tableName) .startTime(startTime) .endTime(endTime) - .limit(limit) .build(); } /** * Creates criteria for querying by client-provided trace ID. * + *

Pagination is handled separately via the {@code PageToken} parameter to query methods. + * * @param reportTraceId the client trace ID to search for - * @param limit maximum number of results * @return the query criteria */ - static MetricsQueryCriteria forReportTraceId(String reportTraceId, int limit) { - return builder().reportTraceId(reportTraceId).limit(limit).build(); + static MetricsQueryCriteria forReportTraceId(String reportTraceId) { + return builder().reportTraceId(reportTraceId).build(); + } + + /** + * Creates empty criteria (no filters). Useful for pagination-only queries. + * + * @return empty query criteria + */ + static MetricsQueryCriteria empty() { + return builder().build(); } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java index cd41e82573..56bd435ccf 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java @@ -20,15 +20,15 @@ import jakarta.annotation.Nonnull; import java.util.Collections; -import java.util.List; +import org.apache.polaris.core.persistence.pagination.Page; +import org.apache.polaris.core.persistence.pagination.PageToken; /** * A no-op implementation of {@link MetricsPersistence} for backends that don't support metrics * persistence. * *

This implementation is used as the default when a persistence backend does not support metrics - * storage. All write operations are silently ignored, and all query operations return empty - * results. + * storage. All write operations are silently ignored, and all query operations return empty pages. */ final class NoOpMetricsPersistence implements MetricsPersistence { @@ -51,13 +51,15 @@ public void writeCommitReport(@Nonnull CommitMetricsRecord record) { @Nonnull @Override - public List queryScanReports(@Nonnull MetricsQueryCriteria criteria) { - return Collections.emptyList(); + public Page queryScanReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { + return Page.fromItems(Collections.emptyList()); } @Nonnull @Override - public List queryCommitReports(@Nonnull MetricsQueryCriteria criteria) { - return Collections.emptyList(); + public Page queryCommitReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { + return Page.fromItems(Collections.emptyList()); } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java new file mode 100644 index 0000000000..fb15480a0c --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import jakarta.annotation.Nullable; +import org.apache.polaris.core.persistence.pagination.Token; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Pagination {@linkplain Token token} for metrics queries, backed by the report ID (UUID). + * + *

This token enables cursor-based pagination for metrics queries across different storage + * backends. The report ID is used as the cursor because it is: + * + *

    + *
  • Guaranteed unique across all reports + *
  • Present in both scan and commit metrics records + *
  • Stable (doesn't change over time) + *
+ * + *

Each backend implementation can use this cursor value to implement efficient pagination in + * whatever way is optimal for that storage system: + * + *

    + *
  • RDBMS: {@code WHERE report_id > :lastReportId ORDER BY report_id} + *
  • NoSQL: Use report ID as partition/sort key cursor + *
  • Time-series: Combine with timestamp for efficient range scans + *
+ */ +@PolarisImmutable +@JsonSerialize(as = ImmutableReportIdToken.class) +@JsonDeserialize(as = ImmutableReportIdToken.class) +public interface ReportIdToken extends Token { + + /** Token type identifier. Short to minimize serialized token size. */ + String ID = "r"; + + /** + * The report ID to use as the cursor. + * + *

Results should start after this report ID. This is typically the {@code reportId} of the + * last item from the previous page. + */ + @JsonProperty("r") + String reportId(); + + @Override + default String getT() { + return ID; + } + + /** + * Creates a token from a report ID. + * + * @param reportId the report ID to use as cursor + * @return the token, or null if reportId is null + */ + static @Nullable ReportIdToken fromReportId(@Nullable String reportId) { + if (reportId == null) { + return null; + } + return ImmutableReportIdToken.builder().reportId(reportId).build(); + } + + /** + * Creates a token from a metrics record. + * + * @param record the record whose report ID should be used as cursor + * @return the token, or null if record is null + */ + static @Nullable ReportIdToken fromRecord(@Nullable ScanMetricsRecord record) { + if (record == null) { + return null; + } + return fromReportId(record.reportId()); + } + + /** + * Creates a token from a commit metrics record. + * + * @param record the record whose report ID should be used as cursor + * @return the token, or null if record is null + */ + static @Nullable ReportIdToken fromRecord(@Nullable CommitMetricsRecord record) { + if (record == null) { + return null; + } + return fromReportId(record.reportId()); + } + + /** Token type registration for service loader. */ + final class ReportIdTokenType implements TokenType { + @Override + public String id() { + return ID; + } + + @Override + public Class javaType() { + return ReportIdToken.class; + } + } +} diff --git a/polaris-core/src/main/resources/META-INF/services/org.apache.polaris.core.persistence.pagination.Token$TokenType b/polaris-core/src/main/resources/META-INF/services/org.apache.polaris.core.persistence.pagination.Token$TokenType index 3579dd29b3..d496ebeddf 100644 --- a/polaris-core/src/main/resources/META-INF/services/org.apache.polaris.core.persistence.pagination.Token$TokenType +++ b/polaris-core/src/main/resources/META-INF/services/org.apache.polaris.core.persistence.pagination.Token$TokenType @@ -18,3 +18,4 @@ # org.apache.polaris.core.persistence.pagination.EntityIdToken$EntityIdTokenType +org.apache.polaris.core.persistence.metrics.ReportIdToken$ReportIdTokenType From 59c907f270369fde124ac8b84899e35065066bf9 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Tue, 3 Feb 2026 18:28:22 -0800 Subject: [PATCH 04/67] Review comments --- .../iceberg/MetricsRecordConverter.java | 256 ++++++++++++++++++ .../metrics/CommitMetricsRecord.java | 56 +--- .../persistence/metrics/MetricsContext.java | 56 ---- .../metrics/MetricsPersistence.java | 57 ++-- .../metrics/MetricsPersistenceFactory.java | 41 --- .../metrics/MetricsQueryCriteria.java | 33 +-- .../metrics/MetricsRecordConverter.java | 180 ------------ .../metrics/MetricsRecordIdentity.java | 93 +++++++ .../metrics/NoOpMetricsPersistence.java | 5 - .../persistence/metrics/ReportIdToken.java | 9 + .../metrics/ScanMetricsRecord.java | 56 +--- 11 files changed, 416 insertions(+), 426 deletions(-) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java new file mode 100644 index 0000000000..e9d13f36d0 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.metrics.iceberg; + +import java.time.Instant; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.apache.iceberg.metrics.CommitMetricsResult; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.CounterResult; +import org.apache.iceberg.metrics.ScanMetricsResult; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.iceberg.metrics.TimerResult; +import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; +import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; + +/** + * Converts Iceberg metrics reports to SPI record types using a fluent builder API. + * + *

This converter extracts all relevant metrics from Iceberg's {@link ScanReport} and {@link + * CommitReport} and combines them with context information to create persistence-ready records. + * + *

Example usage: + * + *

{@code
+ * ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport)
+ *     .catalogId(catalog.getId())
+ *     .catalogName(catalog.getName())
+ *     .namespace(namespace.toString())
+ *     .tableName(tableName)
+ *     .build();
+ * }
+ */ +public final class MetricsRecordConverter { + + private MetricsRecordConverter() { + // Utility class + } + + /** + * Creates a builder for converting a ScanReport to a ScanMetricsRecord. + * + * @param scanReport the Iceberg scan report + * @return builder for configuring the conversion + */ + public static ScanReportBuilder forScanReport(ScanReport scanReport) { + return new ScanReportBuilder(scanReport); + } + + /** + * Creates a builder for converting a CommitReport to a CommitMetricsRecord. + * + * @param commitReport the Iceberg commit report + * @return builder for configuring the conversion + */ + public static CommitReportBuilder forCommitReport(CommitReport commitReport) { + return new CommitReportBuilder(commitReport); + } + + /** Builder for converting ScanReport to ScanMetricsRecord. */ + public static final class ScanReportBuilder { + private final ScanReport scanReport; + private long catalogId; + private String catalogName; + private String namespace; + private String tableName; + + private ScanReportBuilder(ScanReport scanReport) { + this.scanReport = scanReport; + } + + public ScanReportBuilder catalogId(long catalogId) { + this.catalogId = catalogId; + return this; + } + + public ScanReportBuilder catalogName(String catalogName) { + this.catalogName = catalogName; + return this; + } + + public ScanReportBuilder namespace(String namespace) { + this.namespace = namespace; + return this; + } + + public ScanReportBuilder tableName(String tableName) { + this.tableName = tableName; + return this; + } + + public ScanMetricsRecord build() { + ScanMetricsResult metrics = scanReport.scanMetrics(); + Map reportMetadata = + scanReport.metadata() != null ? scanReport.metadata() : Collections.emptyMap(); + + return ScanMetricsRecord.builder() + .reportId(UUID.randomUUID().toString()) + .catalogId(catalogId) + .catalogName(catalogName) + .namespace(namespace) + .tableName(tableName) + .timestamp(Instant.now()) + .snapshotId(Optional.of(scanReport.snapshotId())) + .schemaId(Optional.of(scanReport.schemaId())) + .filterExpression( + scanReport.filter() != null + ? Optional.of(scanReport.filter().toString()) + : Optional.empty()) + .projectedFieldIds( + scanReport.projectedFieldIds() != null + ? scanReport.projectedFieldIds() + : Collections.emptyList()) + .projectedFieldNames( + scanReport.projectedFieldNames() != null + ? scanReport.projectedFieldNames() + : Collections.emptyList()) + .resultDataFiles(getCounterValue(metrics.resultDataFiles())) + .resultDeleteFiles(getCounterValue(metrics.resultDeleteFiles())) + .totalFileSizeBytes(getCounterValue(metrics.totalFileSizeInBytes())) + .totalDataManifests(getCounterValue(metrics.totalDataManifests())) + .totalDeleteManifests(getCounterValue(metrics.totalDeleteManifests())) + .scannedDataManifests(getCounterValue(metrics.scannedDataManifests())) + .scannedDeleteManifests(getCounterValue(metrics.scannedDeleteManifests())) + .skippedDataManifests(getCounterValue(metrics.skippedDataManifests())) + .skippedDeleteManifests(getCounterValue(metrics.skippedDeleteManifests())) + .skippedDataFiles(getCounterValue(metrics.skippedDataFiles())) + .skippedDeleteFiles(getCounterValue(metrics.skippedDeleteFiles())) + .totalPlanningDurationMs(getTimerValueMs(metrics.totalPlanningDuration())) + .equalityDeleteFiles(getCounterValue(metrics.equalityDeleteFiles())) + .positionalDeleteFiles(getCounterValue(metrics.positionalDeleteFiles())) + .indexedDeleteFiles(getCounterValue(metrics.indexedDeleteFiles())) + .totalDeleteFileSizeBytes(getCounterValue(metrics.totalDeleteFileSizeInBytes())) + .metadata(reportMetadata) + .build(); + } + } + + /** Builder for converting CommitReport to CommitMetricsRecord. */ + public static final class CommitReportBuilder { + private final CommitReport commitReport; + private long catalogId; + private String catalogName; + private String namespace; + private String tableName; + + private CommitReportBuilder(CommitReport commitReport) { + this.commitReport = commitReport; + } + + public CommitReportBuilder catalogId(long catalogId) { + this.catalogId = catalogId; + return this; + } + + public CommitReportBuilder catalogName(String catalogName) { + this.catalogName = catalogName; + return this; + } + + public CommitReportBuilder namespace(String namespace) { + this.namespace = namespace; + return this; + } + + public CommitReportBuilder tableName(String tableName) { + this.tableName = tableName; + return this; + } + + public CommitMetricsRecord build() { + CommitMetricsResult metrics = commitReport.commitMetrics(); + Map reportMetadata = + commitReport.metadata() != null ? commitReport.metadata() : Collections.emptyMap(); + + return CommitMetricsRecord.builder() + .reportId(UUID.randomUUID().toString()) + .catalogId(catalogId) + .catalogName(catalogName) + .namespace(namespace) + .tableName(tableName) + .timestamp(Instant.now()) + .snapshotId(commitReport.snapshotId()) + .sequenceNumber(Optional.of(commitReport.sequenceNumber())) + .operation(commitReport.operation()) + .addedDataFiles(getCounterValue(metrics.addedDataFiles())) + .removedDataFiles(getCounterValue(metrics.removedDataFiles())) + .totalDataFiles(getCounterValue(metrics.totalDataFiles())) + .addedDeleteFiles(getCounterValue(metrics.addedDeleteFiles())) + .removedDeleteFiles(getCounterValue(metrics.removedDeleteFiles())) + .totalDeleteFiles(getCounterValue(metrics.totalDeleteFiles())) + .addedEqualityDeleteFiles(getCounterValue(metrics.addedEqualityDeleteFiles())) + .removedEqualityDeleteFiles(getCounterValue(metrics.removedEqualityDeleteFiles())) + .addedPositionalDeleteFiles(getCounterValue(metrics.addedPositionalDeleteFiles())) + .removedPositionalDeleteFiles(getCounterValue(metrics.removedPositionalDeleteFiles())) + .addedRecords(getCounterValue(metrics.addedRecords())) + .removedRecords(getCounterValue(metrics.removedRecords())) + .totalRecords(getCounterValue(metrics.totalRecords())) + .addedFileSizeBytes(getCounterValue(metrics.addedFilesSizeInBytes())) + .removedFileSizeBytes(getCounterValue(metrics.removedFilesSizeInBytes())) + .totalFileSizeBytes(getCounterValue(metrics.totalFilesSizeInBytes())) + .totalDurationMs(getTimerValueMsOpt(metrics.totalDuration())) + .attempts(getCounterValueInt(metrics.attempts())) + .metadata(reportMetadata) + .build(); + } + } + + // === Helper Methods === + + private static long getCounterValue(CounterResult counter) { + if (counter == null) { + return 0L; + } + return counter.value(); + } + + private static int getCounterValueInt(CounterResult counter) { + if (counter == null) { + return 0; + } + return (int) counter.value(); + } + + private static long getTimerValueMs(TimerResult timer) { + if (timer == null || timer.totalDuration() == null) { + return 0L; + } + return timer.totalDuration().toMillis(); + } + + private static Optional getTimerValueMsOpt(TimerResult timer) { + if (timer == null || timer.totalDuration() == null) { + return Optional.empty(); + } + return Optional.of(timer.totalDuration().toMillis()); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java index 9d41db8173..2986beb0fb 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java @@ -18,8 +18,6 @@ */ package org.apache.polaris.core.persistence.metrics; -import java.time.Instant; -import java.util.Map; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; @@ -27,50 +25,15 @@ * Backend-agnostic representation of an Iceberg commit metrics report. * *

This record captures all relevant metrics from an Iceberg {@code CommitReport} along with - * contextual information such as realm, catalog, and request correlation data. + * contextual information such as catalog identification and table location. + * + *

Common identification fields are inherited from {@link MetricsRecordIdentity}. + * + *

Note: Realm ID is not included in this record. Multi-tenancy realm context should be obtained + * from the CDI-injected {@code RealmContext} at persistence time. */ @PolarisImmutable -public interface CommitMetricsRecord { - - // === Identification === - - /** Unique identifier for this report (UUID). */ - String reportId(); - - /** Multi-tenancy realm identifier. */ - String realmId(); - - /** Internal catalog ID. */ - String catalogId(); - - /** Human-readable catalog name. */ - String catalogName(); - - /** Dot-separated namespace path (e.g., "db.schema"). */ - String namespace(); - - /** Table name. */ - String tableName(); - - // === Timing === - - /** Timestamp when the report was received. */ - Instant timestamp(); - - // === Client Correlation === - - /** - * Client-provided trace ID from the metrics report metadata. - * - *

This is an optional identifier that the Iceberg client may include in the report's metadata - * map (typically under the key "trace-id"). It allows clients to correlate this metrics report - * with their own distributed tracing system or query execution context. - * - *

Note: Server-side tracing information (e.g., OpenTelemetry trace/span IDs) and principal - * information are not included in this record. The persistence implementation can obtain these - * from the ambient request context (OTel context, security context) at write time if needed. - */ - Optional reportTraceId(); +public interface CommitMetricsRecord extends MetricsRecordIdentity { // === Commit Context === @@ -147,11 +110,6 @@ public interface CommitMetricsRecord { /** Number of commit attempts. */ int attempts(); - // === Extensibility === - - /** Additional metadata as key-value pairs. */ - Map metadata(); - /** * Creates a new builder for CommitMetricsRecord. * diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java deleted file mode 100644 index 38432c12c2..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.persistence.metrics; - -import org.apache.polaris.immutables.PolarisImmutable; - -/** - * Context information needed when converting Iceberg metrics reports to persistence records. - * - *

This context captures information from the request environment that is not available in the - * Iceberg report itself, such as realm and catalog identification. - * - *

Note: Principal and tracing information (e.g., OpenTelemetry trace/span IDs) are not included - * in this context. The persistence implementation can obtain these from the ambient request context - * (OTel context, security context) at write time if needed. - */ -@PolarisImmutable -public interface MetricsContext { - - /** Multi-tenancy realm identifier. */ - String realmId(); - - /** Internal catalog ID. */ - String catalogId(); - - /** Human-readable catalog name. */ - String catalogName(); - - /** Dot-separated namespace path (e.g., "db.schema"). */ - String namespace(); - - /** - * Creates a new builder for MetricsContext. - * - * @return a new builder instance - */ - static ImmutableMetricsContext.Builder builder() { - return ImmutableMetricsContext.builder(); - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java index 346cc77d01..ae7e6f7ec0 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java @@ -30,7 +30,33 @@ * remain backend-agnostic. * *

Implementations should be idempotent - writing the same reportId twice should have no effect. - * Implementations that don't support metrics persistence should return {@link #NOOP}. + * Implementations that don't support metrics persistence can use {@link #NOOP} which silently + * ignores write operations and returns empty pages for queries. + * + *

Dependency Injection

+ * + *

This interface is designed to be injected via CDI (Contexts and Dependency Injection). The + * deployment module (e.g., {@code polaris-quarkus-service}) should provide a {@code @Produces} + * method that creates the appropriate implementation based on the configured persistence backend. + * + *

Example producer: + * + *

{@code
+ * @Produces
+ * @RequestScoped
+ * MetricsPersistence metricsPersistence(RealmContext realmContext, PersistenceBackend backend) {
+ *   if (backend.supportsMetrics()) {
+ *     return backend.createMetricsPersistence(realmContext);
+ *   }
+ *   return MetricsPersistence.NOOP;
+ * }
+ * }
+ * + *

Multi-Tenancy

+ * + *

Realm context is not passed in the record objects. Implementations should obtain the realm + * from the CDI-injected {@code RealmContext} at write/query time. This keeps catalog-specific code + * from needing to manage realm concerns directly. * *

Pagination

* @@ -43,8 +69,8 @@ *
  • Efficient cursor-based pagination that works with large result sets * * - *

    The {@link ReportIdToken} provides a cursor based on the report ID (UUID), but backends may - * use other cursor strategies internally. + *

    The {@link ReportIdToken} provides a reference cursor implementation based on report ID + * (UUID), but backends may use other cursor strategies internally. * * @see PageToken * @see Page @@ -55,21 +81,6 @@ public interface MetricsPersistence { /** A no-op implementation for backends that don't support metrics persistence. */ MetricsPersistence NOOP = new NoOpMetricsPersistence(); - // ============================================================================ - // Capability Detection - // ============================================================================ - - /** - * Returns whether this persistence backend supports metrics storage. - * - *

    Backends that do not support metrics should return false. Service code should NOT use this - * to branch with instanceof checks - instead, call the interface methods directly and rely on the - * no-op behavior for unsupported backends. - * - * @return true if metrics persistence is supported, false otherwise - */ - boolean isSupported(); - // ============================================================================ // Write Operations // ============================================================================ @@ -77,8 +88,7 @@ public interface MetricsPersistence { /** * Persists a scan metrics record. * - *

    This operation is idempotent - writing the same reportId twice has no effect. If {@link - * #isSupported()} returns false, this is a no-op. + *

    This operation is idempotent - writing the same reportId twice has no effect. * * @param record the scan metrics record to persist */ @@ -87,8 +97,7 @@ public interface MetricsPersistence { /** * Persists a commit metrics record. * - *

    This operation is idempotent - writing the same reportId twice has no effect. If {@link - * #isSupported()} returns false, this is a no-op. + *

    This operation is idempotent - writing the same reportId twice has no effect. * * @param record the commit metrics record to persist */ @@ -101,8 +110,6 @@ public interface MetricsPersistence { /** * Queries scan metrics reports based on the specified criteria. * - *

    Returns an empty page if {@link #isSupported()} returns false. - * *

    Example usage: * *

    {@code
    @@ -129,8 +136,6 @@ Page queryScanReports(
       /**
        * Queries commit metrics reports based on the specified criteria.
        *
    -   * 

    Returns an empty page if {@link #isSupported()} returns false. - * * @param criteria the query criteria (filters) * @param pageToken pagination parameters (page size and optional cursor) * @return page of matching commit metrics records with continuation token if more results exist diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java deleted file mode 100644 index 90a56e1ec3..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.persistence.metrics; - -import org.apache.polaris.core.context.RealmContext; - -/** - * Factory interface for creating {@link MetricsPersistence} instances. - * - *

    Implementations may cache instances per realm for efficiency. For backends that don't support - * metrics persistence, implementations should return {@link MetricsPersistence#NOOP}. - */ -public interface MetricsPersistenceFactory { - - /** - * Gets or creates a {@link MetricsPersistence} instance for the given realm. - * - *

    Implementations may cache instances per realm. If the persistence backend does not support - * metrics persistence, this method should return {@link MetricsPersistence#NOOP}. - * - * @param realmContext the realm context - * @return a MetricsPersistence instance for the realm, or NOOP if not supported - */ - MetricsPersistence getOrCreateMetricsPersistence(RealmContext realmContext); -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index d1989c68d2..98d00d742c 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -40,10 +40,13 @@ * * * - * * *
    PatternFields UsedIndex Required
    By Table + TimecatalogName, namespace, tableName, startTime, endTimeYes (OSS)
    By Client Trace IDreportTraceIdNo (custom deployment)
    By Time OnlystartTime, endTimePartial (timestamp index)
    * + *

    Additional query patterns (e.g., by trace ID) can be implemented by persistence backends using + * the {@link #metadata()} filter map. Client-provided correlation data should be stored in the + * metrics record's metadata map and can be filtered using the metadata criteria. + * *

    Pagination

    * *

    Pagination is handled via the {@link org.apache.polaris.core.persistence.pagination.PageToken} @@ -83,18 +86,20 @@ public interface MetricsQueryCriteria { /** End time for the query (exclusive). */ Optional endTime(); - // === Correlation === + // === Metadata Filtering === /** - * Client-provided trace ID to filter by (from report metadata). + * Metadata key-value pairs to filter by. * - *

    This matches the {@code reportTraceId} field in the metrics records, which originates from - * the client's metadata map. Useful for correlating metrics with client-side query execution. + *

    This enables filtering metrics by client-provided correlation data stored in the record's + * metadata map. For example, clients may include a trace ID in the metadata that can be queried + * later. * - *

    Note: This query pattern may require a custom index in deployment environments. The OSS - * codebase does not include an index for trace-based queries. + *

    Note: Metadata filtering may require custom indexes depending on the persistence backend. + * The OSS codebase provides basic support, but performance optimizations may be needed for + * high-volume deployments. */ - Optional reportTraceId(); + java.util.Map metadata(); // === Factory Methods === @@ -130,18 +135,6 @@ static MetricsQueryCriteria forTable( .build(); } - /** - * Creates criteria for querying by client-provided trace ID. - * - *

    Pagination is handled separately via the {@code PageToken} parameter to query methods. - * - * @param reportTraceId the client trace ID to search for - * @return the query criteria - */ - static MetricsQueryCriteria forReportTraceId(String reportTraceId) { - return builder().reportTraceId(reportTraceId).build(); - } - /** * Creates empty criteria (no filters). Useful for pagination-only queries. * diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java deleted file mode 100644 index 0cb740b8cf..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.persistence.metrics; - -import java.time.Instant; -import java.util.Collections; -import java.util.Optional; -import java.util.UUID; -import org.apache.iceberg.metrics.CommitMetricsResult; -import org.apache.iceberg.metrics.CommitReport; -import org.apache.iceberg.metrics.CounterResult; -import org.apache.iceberg.metrics.ScanMetricsResult; -import org.apache.iceberg.metrics.ScanReport; -import org.apache.iceberg.metrics.TimerResult; - -/** - * Utility class for converting Iceberg metrics reports to SPI record types. - * - *

    This converter extracts all relevant metrics from Iceberg's {@link ScanReport} and {@link - * CommitReport} and combines them with context information to create persistence-ready records. - */ -public final class MetricsRecordConverter { - - private MetricsRecordConverter() { - // Utility class - } - - /** - * Converts an Iceberg ScanReport to a ScanMetricsRecord. - * - * @param scanReport the Iceberg scan report - * @param tableName the table name - * @param context the metrics context containing realm, catalog, and request information - * @return the scan metrics record ready for persistence - */ - public static ScanMetricsRecord fromScanReport( - ScanReport scanReport, String tableName, MetricsContext context) { - ScanMetricsResult metrics = scanReport.scanMetrics(); - - return ScanMetricsRecord.builder() - .reportId(UUID.randomUUID().toString()) - .realmId(context.realmId()) - .catalogId(context.catalogId()) - .catalogName(context.catalogName()) - .namespace(context.namespace()) - .tableName(tableName) - .timestamp(Instant.now()) - .reportTraceId(getMetadataValue(scanReport.metadata(), "trace-id")) - .snapshotId(Optional.of(scanReport.snapshotId())) - .schemaId(Optional.of(scanReport.schemaId())) - .filterExpression( - scanReport.filter() != null - ? Optional.of(scanReport.filter().toString()) - : Optional.empty()) - .projectedFieldIds( - scanReport.projectedFieldIds() != null - ? scanReport.projectedFieldIds() - : Collections.emptyList()) - .projectedFieldNames( - scanReport.projectedFieldNames() != null - ? scanReport.projectedFieldNames() - : Collections.emptyList()) - .resultDataFiles(getCounterValue(metrics.resultDataFiles())) - .resultDeleteFiles(getCounterValue(metrics.resultDeleteFiles())) - .totalFileSizeBytes(getCounterValue(metrics.totalFileSizeInBytes())) - .totalDataManifests(getCounterValue(metrics.totalDataManifests())) - .totalDeleteManifests(getCounterValue(metrics.totalDeleteManifests())) - .scannedDataManifests(getCounterValue(metrics.scannedDataManifests())) - .scannedDeleteManifests(getCounterValue(metrics.scannedDeleteManifests())) - .skippedDataManifests(getCounterValue(metrics.skippedDataManifests())) - .skippedDeleteManifests(getCounterValue(metrics.skippedDeleteManifests())) - .skippedDataFiles(getCounterValue(metrics.skippedDataFiles())) - .skippedDeleteFiles(getCounterValue(metrics.skippedDeleteFiles())) - .totalPlanningDurationMs(getTimerValueMs(metrics.totalPlanningDuration())) - .equalityDeleteFiles(getCounterValue(metrics.equalityDeleteFiles())) - .positionalDeleteFiles(getCounterValue(metrics.positionalDeleteFiles())) - .indexedDeleteFiles(getCounterValue(metrics.indexedDeleteFiles())) - .totalDeleteFileSizeBytes(getCounterValue(metrics.totalDeleteFileSizeInBytes())) - .metadata(Collections.emptyMap()) - .build(); - } - - /** - * Converts an Iceberg CommitReport to a CommitMetricsRecord. - * - * @param commitReport the Iceberg commit report - * @param tableName the table name - * @param context the metrics context containing realm, catalog, and request information - * @return the commit metrics record ready for persistence - */ - public static CommitMetricsRecord fromCommitReport( - CommitReport commitReport, String tableName, MetricsContext context) { - CommitMetricsResult metrics = commitReport.commitMetrics(); - - return CommitMetricsRecord.builder() - .reportId(UUID.randomUUID().toString()) - .realmId(context.realmId()) - .catalogId(context.catalogId()) - .catalogName(context.catalogName()) - .namespace(context.namespace()) - .tableName(tableName) - .timestamp(Instant.now()) - .reportTraceId(getMetadataValue(commitReport.metadata(), "trace-id")) - .snapshotId(commitReport.snapshotId()) - .sequenceNumber(Optional.of(commitReport.sequenceNumber())) - .operation(commitReport.operation()) - .addedDataFiles(getCounterValue(metrics.addedDataFiles())) - .removedDataFiles(getCounterValue(metrics.removedDataFiles())) - .totalDataFiles(getCounterValue(metrics.totalDataFiles())) - .addedDeleteFiles(getCounterValue(metrics.addedDeleteFiles())) - .removedDeleteFiles(getCounterValue(metrics.removedDeleteFiles())) - .totalDeleteFiles(getCounterValue(metrics.totalDeleteFiles())) - .addedEqualityDeleteFiles(getCounterValue(metrics.addedEqualityDeleteFiles())) - .removedEqualityDeleteFiles(getCounterValue(metrics.removedEqualityDeleteFiles())) - .addedPositionalDeleteFiles(getCounterValue(metrics.addedPositionalDeleteFiles())) - .removedPositionalDeleteFiles(getCounterValue(metrics.removedPositionalDeleteFiles())) - .addedRecords(getCounterValue(metrics.addedRecords())) - .removedRecords(getCounterValue(metrics.removedRecords())) - .totalRecords(getCounterValue(metrics.totalRecords())) - .addedFileSizeBytes(getCounterValue(metrics.addedFilesSizeInBytes())) - .removedFileSizeBytes(getCounterValue(metrics.removedFilesSizeInBytes())) - .totalFileSizeBytes(getCounterValue(metrics.totalFilesSizeInBytes())) - .totalDurationMs(getTimerValueMsOpt(metrics.totalDuration())) - .attempts(getCounterValueInt(metrics.attempts())) - .metadata(Collections.emptyMap()) - .build(); - } - - private static long getCounterValue(CounterResult counter) { - if (counter == null) { - return 0L; - } - return counter.value(); - } - - private static int getCounterValueInt(CounterResult counter) { - if (counter == null) { - return 0; - } - return (int) counter.value(); - } - - private static long getTimerValueMs(TimerResult timer) { - if (timer == null || timer.totalDuration() == null) { - return 0L; - } - return timer.totalDuration().toMillis(); - } - - private static Optional getTimerValueMsOpt(TimerResult timer) { - if (timer == null || timer.totalDuration() == null) { - return Optional.empty(); - } - return Optional.of(timer.totalDuration().toMillis()); - } - - private static Optional getMetadataValue( - java.util.Map metadata, String key) { - if (metadata == null) { - return Optional.empty(); - } - return Optional.ofNullable(metadata.get(key)); - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java new file mode 100644 index 0000000000..db8ac1325d --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.Map; + +/** + * Base interface containing common identification fields shared by all metrics records. + * + *

    This interface defines the common fields that identify the source of a metrics report, + * including the report ID, catalog information, namespace, table name, timestamp, and metadata. + * + *

    Both {@link ScanMetricsRecord} and {@link CommitMetricsRecord} extend this interface to + * inherit these common fields while adding their own specific metrics. + * + *

    Note: Realm ID is intentionally not included in this interface. Multi-tenancy realm context + * should be obtained from the CDI-injected {@code RealmContext} at persistence time. This keeps + * catalog-specific code from needing to manage realm concerns. + */ +public interface MetricsRecordIdentity { + + /** + * Unique identifier for this report (UUID). + * + *

    This ID is generated when the record is created and serves as the primary key for the + * metrics record in persistence storage. + */ + String reportId(); + + /** + * Internal catalog ID. + * + *

    This matches the catalog entity ID in Polaris persistence, as defined by {@code + * PolarisEntityCore#getId()}. + */ + long catalogId(); + + /** + * Human-readable catalog name. + * + *

    The catalog name as known to clients. This is stored alongside the ID for query convenience + * and display purposes. + */ + String catalogName(); + + /** + * Dot-separated namespace path (e.g., "db.schema"). + * + *

    The namespace containing the table for which metrics are reported. + */ + String namespace(); + + /** + * Table name. + * + *

    The name of the table for which metrics are reported. + */ + String tableName(); + + /** + * Timestamp when the report was received. + * + *

    This is the server-side timestamp when the metrics report was processed, not the client-side + * timestamp when the operation occurred. + */ + Instant timestamp(); + + /** + * Additional metadata as key-value pairs. + * + *

    This map can contain additional contextual information from the original Iceberg report, + * including client-provided trace IDs or other correlation data. Persistence implementations can + * store and index specific metadata fields as needed. + */ + Map metadata(); +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java index 56bd435ccf..b33c095dc8 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java @@ -34,11 +34,6 @@ final class NoOpMetricsPersistence implements MetricsPersistence { NoOpMetricsPersistence() {} - @Override - public boolean isSupported() { - return false; - } - @Override public void writeScanReport(@Nonnull ScanMetricsRecord record) { // No-op diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java index fb15480a0c..c4e4ec6320 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java @@ -28,6 +28,15 @@ /** * Pagination {@linkplain Token token} for metrics queries, backed by the report ID (UUID). * + *

    Note: This is a reference implementation provided for convenience. It is + * not required by the {@link MetricsPersistence} SPI contract. Persistence backends are + * free to implement their own {@link Token} subclass optimized for their storage model (e.g., + * timestamp-based cursors, composite keys, continuation tokens). + * + *

    Only {@link org.apache.polaris.core.persistence.pagination.PageToken} (for requests) and + * {@link org.apache.polaris.core.persistence.pagination.Page} (for responses) are required by the + * SPI contract. + * *

    This token enables cursor-based pagination for metrics queries across different storage * backends. The report ID is used as the cursor because it is: * diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java index 5e85566c14..b9fd79ec29 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java @@ -18,9 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; -import java.time.Instant; import java.util.List; -import java.util.Map; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; @@ -28,50 +26,15 @@ * Backend-agnostic representation of an Iceberg scan metrics report. * *

    This record captures all relevant metrics from an Iceberg {@code ScanReport} along with - * contextual information such as realm, catalog, and request correlation data. + * contextual information such as catalog identification and table location. + * + *

    Common identification fields are inherited from {@link MetricsRecordIdentity}. + * + *

    Note: Realm ID is not included in this record. Multi-tenancy realm context should be obtained + * from the CDI-injected {@code RealmContext} at persistence time. */ @PolarisImmutable -public interface ScanMetricsRecord { - - // === Identification === - - /** Unique identifier for this report (UUID). */ - String reportId(); - - /** Multi-tenancy realm identifier. */ - String realmId(); - - /** Internal catalog ID. */ - String catalogId(); - - /** Human-readable catalog name. */ - String catalogName(); - - /** Dot-separated namespace path (e.g., "db.schema"). */ - String namespace(); - - /** Table name. */ - String tableName(); - - // === Timing === - - /** Timestamp when the report was received. */ - Instant timestamp(); - - // === Client Correlation === - - /** - * Client-provided trace ID from the metrics report metadata. - * - *

    This is an optional identifier that the Iceberg client may include in the report's metadata - * map (typically under the key "trace-id"). It allows clients to correlate this metrics report - * with their own distributed tracing system or query execution context. - * - *

    Note: Server-side tracing information (e.g., OpenTelemetry trace/span IDs) and principal - * information are not included in this record. The persistence implementation can obtain these - * from the ambient request context (OTel context, security context) at write time if needed. - */ - Optional reportTraceId(); +public interface ScanMetricsRecord extends MetricsRecordIdentity { // === Scan Context === @@ -146,11 +109,6 @@ public interface ScanMetricsRecord { /** Total size of delete files in bytes. */ long totalDeleteFileSizeBytes(); - // === Extensibility === - - /** Additional metadata as key-value pairs. */ - Map metadata(); - /** * Creates a new builder for ScanMetricsRecord. * From 9ec22eaf97db88565cd7b393d24009a54498b1dc Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Tue, 3 Feb 2026 18:48:56 -0800 Subject: [PATCH 05/67] Review comments --- .../iceberg/MetricsRecordConverter.java | 46 +++++++++---------- .../metrics/MetricsRecordIdentity.java | 44 ++++++++++++------ 2 files changed, 53 insertions(+), 37 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index e9d13f36d0..1ce754851f 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Optional; import java.util.UUID; +import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.metrics.CommitMetricsResult; import org.apache.iceberg.metrics.CommitReport; import org.apache.iceberg.metrics.CounterResult; @@ -44,8 +45,7 @@ * ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport) * .catalogId(catalog.getId()) * .catalogName(catalog.getName()) - * .namespace(namespace.toString()) - * .tableName(tableName) + * .tableIdentifier(TableIdentifier.of(namespace, tableName)) * .build(); * }

    */ @@ -80,8 +80,7 @@ public static final class ScanReportBuilder { private final ScanReport scanReport; private long catalogId; private String catalogName; - private String namespace; - private String tableName; + private TableIdentifier tableIdentifier; private ScanReportBuilder(ScanReport scanReport) { this.scanReport = scanReport; @@ -97,13 +96,14 @@ public ScanReportBuilder catalogName(String catalogName) { return this; } - public ScanReportBuilder namespace(String namespace) { - this.namespace = namespace; - return this; - } - - public ScanReportBuilder tableName(String tableName) { - this.tableName = tableName; + /** + * Sets the table identifier including namespace and table name. + * + * @param tableIdentifier the Iceberg table identifier + * @return this builder + */ + public ScanReportBuilder tableIdentifier(TableIdentifier tableIdentifier) { + this.tableIdentifier = tableIdentifier; return this; } @@ -116,8 +116,7 @@ public ScanMetricsRecord build() { .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) .catalogName(catalogName) - .namespace(namespace) - .tableName(tableName) + .tableIdentifier(tableIdentifier) .timestamp(Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) @@ -159,8 +158,7 @@ public static final class CommitReportBuilder { private final CommitReport commitReport; private long catalogId; private String catalogName; - private String namespace; - private String tableName; + private TableIdentifier tableIdentifier; private CommitReportBuilder(CommitReport commitReport) { this.commitReport = commitReport; @@ -176,13 +174,14 @@ public CommitReportBuilder catalogName(String catalogName) { return this; } - public CommitReportBuilder namespace(String namespace) { - this.namespace = namespace; - return this; - } - - public CommitReportBuilder tableName(String tableName) { - this.tableName = tableName; + /** + * Sets the table identifier including namespace and table name. + * + * @param tableIdentifier the Iceberg table identifier + * @return this builder + */ + public CommitReportBuilder tableIdentifier(TableIdentifier tableIdentifier) { + this.tableIdentifier = tableIdentifier; return this; } @@ -195,8 +194,7 @@ public CommitMetricsRecord build() { .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) .catalogName(catalogName) - .namespace(namespace) - .tableName(tableName) + .tableIdentifier(tableIdentifier) .timestamp(Instant.now()) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index db8ac1325d..498a201ab6 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -20,19 +20,39 @@ import java.time.Instant; import java.util.Map; +import org.apache.iceberg.catalog.TableIdentifier; /** * Base interface containing common identification fields shared by all metrics records. * *

    This interface defines the common fields that identify the source of a metrics report, - * including the report ID, catalog information, namespace, table name, timestamp, and metadata. + * including the report ID, catalog information, table identifier, timestamp, and metadata. * *

    Both {@link ScanMetricsRecord} and {@link CommitMetricsRecord} extend this interface to * inherit these common fields while adding their own specific metrics. * - *

    Note: Realm ID is intentionally not included in this interface. Multi-tenancy realm context - * should be obtained from the CDI-injected {@code RealmContext} at persistence time. This keeps - * catalog-specific code from needing to manage realm concerns. + *

    Design Decisions

    + * + *

    TableIdentifier vs separate namespace/tableName: We use Iceberg's {@link + * TableIdentifier} which encapsulates both namespace and table name. This aligns with how Iceberg + * reports identify tables and is consistent with Polaris entity patterns (e.g., {@code + * TableLikeEntity.getTableIdentifier()}). + * + *

    Catalog ID/Name vs CatalogEntity: We use separate primitive fields for catalog ID and + * name rather than {@code CatalogEntity} because: + * + *

      + *
    • {@code CatalogEntity} is a heavyweight object containing storage config, properties, and + * other data not relevant for metrics identification + *
    • {@code CatalogEntity} is not an Immutables-compatible interface, making it difficult to + * include in {@code @PolarisImmutable} generated classes + *
    • For metrics, we only need the catalog ID (for foreign key relationships) and name (for + * display/query convenience) + *
    + * + *

    Realm ID: Realm ID is intentionally not included in this interface. Multi-tenancy realm + * context should be obtained from the CDI-injected {@code RealmContext} at persistence time. This + * keeps catalog-specific code from needing to manage realm concerns. */ public interface MetricsRecordIdentity { @@ -61,18 +81,16 @@ public interface MetricsRecordIdentity { String catalogName(); /** - * Dot-separated namespace path (e.g., "db.schema"). + * Table identifier including namespace and table name. * - *

    The namespace containing the table for which metrics are reported. - */ - String namespace(); - - /** - * Table name. + *

    This uses Iceberg's {@link TableIdentifier} which encapsulates both the namespace path and + * the table name. The namespace can be accessed via {@link TableIdentifier#namespace()} and the + * table name via {@link TableIdentifier#name()}. * - *

    The name of the table for which metrics are reported. + *

    Example: For a table "my_table" in namespace "db.schema", use {@code + * TableIdentifier.of(Namespace.of("db", "schema"), "my_table")}. */ - String tableName(); + TableIdentifier tableIdentifier(); /** * Timestamp when the report was received. From b66a0d6e1737bc810c431ee75484bfe696dad72f Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 14:54:44 -0800 Subject: [PATCH 06/67] refactor: Remove TableIdentifier and catalogName from SPI records Per reviewer feedback: - Replace Iceberg's TableIdentifier with separate namespace/tableName strings in MetricsRecordIdentity to avoid Iceberg dependencies in Polaris SPI - Remove catalogName from records, keep only catalogId since catalog names can change over time (via rename operations) - Update MetricsQueryCriteria to use catalogId (OptionalLong) instead of catalogName - Update MetricsRecordConverter to extract namespace/tableName from TableIdentifier The service layer (MetricsRecordConverter) still accepts TableIdentifier and performs the conversion to primitives for the SPI records. --- .../iceberg/MetricsRecordConverter.java | 37 ++++++++------- .../metrics/MetricsQueryCriteria.java | 17 ++++--- .../metrics/MetricsRecordIdentity.java | 46 +++++++------------ 3 files changed, 47 insertions(+), 53 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index 1ce754851f..5c44ed6c38 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -44,7 +44,6 @@ *

    {@code
      * ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport)
      *     .catalogId(catalog.getId())
    - *     .catalogName(catalog.getName())
      *     .tableIdentifier(TableIdentifier.of(namespace, tableName))
      *     .build();
      * }
    @@ -75,11 +74,20 @@ public static CommitReportBuilder forCommitReport(CommitReport commitReport) { return new CommitReportBuilder(commitReport); } + /** + * Converts a TableIdentifier namespace to a dot-separated string. + * + * @param tableIdentifier the Iceberg table identifier + * @return dot-separated namespace string + */ + private static String namespaceToString(TableIdentifier tableIdentifier) { + return String.join(".", tableIdentifier.namespace().levels()); + } + /** Builder for converting ScanReport to ScanMetricsRecord. */ public static final class ScanReportBuilder { private final ScanReport scanReport; private long catalogId; - private String catalogName; private TableIdentifier tableIdentifier; private ScanReportBuilder(ScanReport scanReport) { @@ -91,14 +99,12 @@ public ScanReportBuilder catalogId(long catalogId) { return this; } - public ScanReportBuilder catalogName(String catalogName) { - this.catalogName = catalogName; - return this; - } - /** * Sets the table identifier including namespace and table name. * + *

    The namespace and table name will be extracted from the TableIdentifier and stored as + * separate primitive fields in the SPI record. + * * @param tableIdentifier the Iceberg table identifier * @return this builder */ @@ -115,8 +121,8 @@ public ScanMetricsRecord build() { return ScanMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .catalogName(catalogName) - .tableIdentifier(tableIdentifier) + .namespace(namespaceToString(tableIdentifier)) + .tableName(tableIdentifier.name()) .timestamp(Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) @@ -157,7 +163,6 @@ public ScanMetricsRecord build() { public static final class CommitReportBuilder { private final CommitReport commitReport; private long catalogId; - private String catalogName; private TableIdentifier tableIdentifier; private CommitReportBuilder(CommitReport commitReport) { @@ -169,14 +174,12 @@ public CommitReportBuilder catalogId(long catalogId) { return this; } - public CommitReportBuilder catalogName(String catalogName) { - this.catalogName = catalogName; - return this; - } - /** * Sets the table identifier including namespace and table name. * + *

    The namespace and table name will be extracted from the TableIdentifier and stored as + * separate primitive fields in the SPI record. + * * @param tableIdentifier the Iceberg table identifier * @return this builder */ @@ -193,8 +196,8 @@ public CommitMetricsRecord build() { return CommitMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .catalogName(catalogName) - .tableIdentifier(tableIdentifier) + .namespace(namespaceToString(tableIdentifier)) + .tableName(tableIdentifier.name()) .timestamp(Instant.now()) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index 98d00d742c..56afe9f264 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -39,7 +39,7 @@ * * * - * + * * *
    PatternFields UsedIndex Required
    By Table + TimecatalogName, namespace, tableName, startTime, endTimeYes (OSS)
    By Table + TimecatalogId, namespace, tableName, startTime, endTimeYes (OSS)
    By Time OnlystartTime, endTimePartial (timestamp index)
    * @@ -69,8 +69,13 @@ public interface MetricsQueryCriteria { // === Table Identification (optional) === - /** Catalog name to filter by. */ - Optional catalogName(); + /** + * Catalog ID to filter by. + * + *

    This is the internal catalog entity ID. Callers should resolve catalog names to IDs before + * querying, as catalog names can change over time. + */ + java.util.OptionalLong catalogId(); /** Namespace to filter by (dot-separated). */ Optional namespace(); @@ -117,7 +122,7 @@ static ImmutableMetricsQueryCriteria.Builder builder() { * *

    Pagination is handled separately via the {@code PageToken} parameter to query methods. * - * @param catalogName the catalog name + * @param catalogId the catalog entity ID * @param namespace the namespace (dot-separated) * @param tableName the table name * @param startTime the start time (inclusive) @@ -125,9 +130,9 @@ static ImmutableMetricsQueryCriteria.Builder builder() { * @return the query criteria */ static MetricsQueryCriteria forTable( - String catalogName, String namespace, String tableName, Instant startTime, Instant endTime) { + long catalogId, String namespace, String tableName, Instant startTime, Instant endTime) { return builder() - .catalogName(catalogName) + .catalogId(catalogId) .namespace(namespace) .tableName(tableName) .startTime(startTime) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index 498a201ab6..574ae77eb9 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -20,35 +20,25 @@ import java.time.Instant; import java.util.Map; -import org.apache.iceberg.catalog.TableIdentifier; /** * Base interface containing common identification fields shared by all metrics records. * *

    This interface defines the common fields that identify the source of a metrics report, - * including the report ID, catalog information, table identifier, timestamp, and metadata. + * including the report ID, catalog ID, table location, timestamp, and metadata. * *

    Both {@link ScanMetricsRecord} and {@link CommitMetricsRecord} extend this interface to * inherit these common fields while adding their own specific metrics. * *

    Design Decisions

    * - *

    TableIdentifier vs separate namespace/tableName: We use Iceberg's {@link - * TableIdentifier} which encapsulates both namespace and table name. This aligns with how Iceberg - * reports identify tables and is consistent with Polaris entity patterns (e.g., {@code - * TableLikeEntity.getTableIdentifier()}). + *

    Namespace/TableName as primitives: We use separate String fields for namespace and + * table name rather than Iceberg's {@code TableIdentifier} to avoid Iceberg dependencies in the + * Polaris SPI. The service layer can convert to/from {@code TableIdentifier} as needed. * - *

    Catalog ID/Name vs CatalogEntity: We use separate primitive fields for catalog ID and - * name rather than {@code CatalogEntity} because: - * - *

      - *
    • {@code CatalogEntity} is a heavyweight object containing storage config, properties, and - * other data not relevant for metrics identification - *
    • {@code CatalogEntity} is not an Immutables-compatible interface, making it difficult to - * include in {@code @PolarisImmutable} generated classes - *
    • For metrics, we only need the catalog ID (for foreign key relationships) and name (for - * display/query convenience) - *
    + *

    Catalog ID only (no name): We store only the catalog ID, not the catalog name. Catalog + * names can change over time (via rename operations), which would make querying historical metrics + * by name challenging. Queries should resolve catalog names to IDs using the current catalog state. * *

    Realm ID: Realm ID is intentionally not included in this interface. Multi-tenancy realm * context should be obtained from the CDI-injected {@code RealmContext} at persistence time. This @@ -68,29 +58,25 @@ public interface MetricsRecordIdentity { * Internal catalog ID. * *

    This matches the catalog entity ID in Polaris persistence, as defined by {@code - * PolarisEntityCore#getId()}. + * PolarisEntityCore#getId()}. The catalog name is not stored since it can change over time; + * queries should resolve names to IDs using the current catalog state. */ long catalogId(); /** - * Human-readable catalog name. + * Namespace path as a dot-separated string (e.g., "db.schema"). * - *

    The catalog name as known to clients. This is stored alongside the ID for query convenience - * and display purposes. + *

    This is the namespace portion of the table identifier. Multi-level namespaces are + * represented with dots separating levels. */ - String catalogName(); + String namespace(); /** - * Table identifier including namespace and table name. - * - *

    This uses Iceberg's {@link TableIdentifier} which encapsulates both the namespace path and - * the table name. The namespace can be accessed via {@link TableIdentifier#namespace()} and the - * table name via {@link TableIdentifier#name()}. + * Table name. * - *

    Example: For a table "my_table" in namespace "db.schema", use {@code - * TableIdentifier.of(Namespace.of("db", "schema"), "my_table")}. + *

    This is the table name portion of the table identifier, without the namespace prefix. */ - TableIdentifier tableIdentifier(); + String tableName(); /** * Timestamp when the report was received. From 7ac814ee969269ff7282861d7d3336c3bb654986 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 15:02:34 -0800 Subject: [PATCH 07/67] refactor: Use List for namespace instead of dot-separated string Per reviewer feedback, namespace is now represented as a List of individual levels rather than a dot-separated string. This avoids ambiguity when namespace segments contain dots. Changes: - MetricsRecordIdentity: namespace() now returns List - MetricsQueryCriteria: namespace() now returns List - MetricsRecordConverter: namespaceToList() converts Iceberg Namespace to List using Arrays.asList() The persistence implementation handles the serialization format. --- .../iceberg/MetricsRecordConverter.java | 14 ++++++----- .../metrics/MetricsQueryCriteria.java | 24 ++++++++++++++----- .../metrics/MetricsRecordIdentity.java | 10 ++++---- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index 5c44ed6c38..71128394ae 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -19,7 +19,9 @@ package org.apache.polaris.core.metrics.iceberg; import java.time.Instant; +import java.util.Arrays; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; @@ -75,13 +77,13 @@ public static CommitReportBuilder forCommitReport(CommitReport commitReport) { } /** - * Converts a TableIdentifier namespace to a dot-separated string. + * Converts a TableIdentifier namespace to a list of levels. * * @param tableIdentifier the Iceberg table identifier - * @return dot-separated namespace string + * @return namespace as a list of levels */ - private static String namespaceToString(TableIdentifier tableIdentifier) { - return String.join(".", tableIdentifier.namespace().levels()); + private static List namespaceToList(TableIdentifier tableIdentifier) { + return Arrays.asList(tableIdentifier.namespace().levels()); } /** Builder for converting ScanReport to ScanMetricsRecord. */ @@ -121,7 +123,7 @@ public ScanMetricsRecord build() { return ScanMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespaceToString(tableIdentifier)) + .namespace(namespaceToList(tableIdentifier)) .tableName(tableIdentifier.name()) .timestamp(Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) @@ -196,7 +198,7 @@ public CommitMetricsRecord build() { return CommitMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespaceToString(tableIdentifier)) + .namespace(namespaceToList(tableIdentifier)) .tableName(tableIdentifier.name()) .timestamp(Instant.now()) .snapshotId(commitReport.snapshotId()) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index 56afe9f264..736e0624c1 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -19,7 +19,10 @@ package org.apache.polaris.core.persistence.metrics; import java.time.Instant; +import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.OptionalLong; import org.apache.polaris.immutables.PolarisImmutable; /** @@ -75,10 +78,15 @@ public interface MetricsQueryCriteria { *

    This is the internal catalog entity ID. Callers should resolve catalog names to IDs before * querying, as catalog names can change over time. */ - java.util.OptionalLong catalogId(); + OptionalLong catalogId(); - /** Namespace to filter by (dot-separated). */ - Optional namespace(); + /** + * Namespace to filter by. + * + *

    The namespace is represented as a list of levels to avoid ambiguity when segments contain + * dots. An empty list means no namespace filter is applied. + */ + List namespace(); /** Table name to filter by. */ Optional tableName(); @@ -104,7 +112,7 @@ public interface MetricsQueryCriteria { * The OSS codebase provides basic support, but performance optimizations may be needed for * high-volume deployments. */ - java.util.Map metadata(); + Map metadata(); // === Factory Methods === @@ -123,14 +131,18 @@ static ImmutableMetricsQueryCriteria.Builder builder() { *

    Pagination is handled separately via the {@code PageToken} parameter to query methods. * * @param catalogId the catalog entity ID - * @param namespace the namespace (dot-separated) + * @param namespace the namespace as a list of levels * @param tableName the table name * @param startTime the start time (inclusive) * @param endTime the end time (exclusive) * @return the query criteria */ static MetricsQueryCriteria forTable( - long catalogId, String namespace, String tableName, Instant startTime, Instant endTime) { + long catalogId, + List namespace, + String tableName, + Instant startTime, + Instant endTime) { return builder() .catalogId(catalogId) .namespace(namespace) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index 574ae77eb9..afbcd1bf4d 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -19,6 +19,7 @@ package org.apache.polaris.core.persistence.metrics; import java.time.Instant; +import java.util.List; import java.util.Map; /** @@ -64,12 +65,13 @@ public interface MetricsRecordIdentity { long catalogId(); /** - * Namespace path as a dot-separated string (e.g., "db.schema"). + * Namespace path as a list of levels (e.g., ["db", "schema"]). * - *

    This is the namespace portion of the table identifier. Multi-level namespaces are - * represented with dots separating levels. + *

    This is the namespace portion of the table identifier. Using a list avoids ambiguity when + * namespace segments contain dots. The persistence implementation handles the serialization + * format. */ - String namespace(); + List namespace(); /** * Table name. From 4a3bc9fbc0f9d5205a3def5f142a5a59c552e549 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 15:25:52 -0800 Subject: [PATCH 08/67] refactor: Use tableId instead of tableName in metrics records Per reviewer feedback: - r2766326028: Use table ID (same as catalog ID) since table names can change - r2766343275: Avoid denormalizing table names to prevent correctness issues - r2766321215: Return builder with table info, add time ranges at call site Changes: - MetricsRecordIdentity: tableName() -> tableId() (long) - MetricsQueryCriteria: tableName() -> tableId() (OptionalLong) - MetricsQueryCriteria.forTable(): Returns builder with catalogId/tableId - MetricsRecordConverter: tableIdentifier(TableIdentifier) -> tableId(long) + namespace(List) The caller (PersistingMetricsReporter) now needs to resolve table entity ID before creating records, similar to how catalogId is resolved. --- .../iceberg/MetricsRecordConverter.java | 73 +++++++++++-------- .../metrics/MetricsQueryCriteria.java | 47 ++++++------ .../metrics/MetricsRecordIdentity.java | 21 +++--- 3 files changed, 78 insertions(+), 63 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index 71128394ae..1a118d3e46 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -19,13 +19,11 @@ package org.apache.polaris.core.metrics.iceberg; import java.time.Instant; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.metrics.CommitMetricsResult; import org.apache.iceberg.metrics.CommitReport; import org.apache.iceberg.metrics.CounterResult; @@ -46,7 +44,8 @@ *

    {@code
      * ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport)
      *     .catalogId(catalog.getId())
    - *     .tableIdentifier(TableIdentifier.of(namespace, tableName))
    + *     .tableId(tableEntity.getId())
    + *     .namespace(namespace)
      *     .build();
      * }
    */ @@ -76,21 +75,12 @@ public static CommitReportBuilder forCommitReport(CommitReport commitReport) { return new CommitReportBuilder(commitReport); } - /** - * Converts a TableIdentifier namespace to a list of levels. - * - * @param tableIdentifier the Iceberg table identifier - * @return namespace as a list of levels - */ - private static List namespaceToList(TableIdentifier tableIdentifier) { - return Arrays.asList(tableIdentifier.namespace().levels()); - } - /** Builder for converting ScanReport to ScanMetricsRecord. */ public static final class ScanReportBuilder { private final ScanReport scanReport; private long catalogId; - private TableIdentifier tableIdentifier; + private long tableId; + private List namespace = Collections.emptyList(); private ScanReportBuilder(ScanReport scanReport) { this.scanReport = scanReport; @@ -102,16 +92,26 @@ public ScanReportBuilder catalogId(long catalogId) { } /** - * Sets the table identifier including namespace and table name. + * Sets the table entity ID. * - *

    The namespace and table name will be extracted from the TableIdentifier and stored as - * separate primitive fields in the SPI record. + *

    This is the internal Polaris entity ID for the table. * - * @param tableIdentifier the Iceberg table identifier + * @param tableId the table entity ID * @return this builder */ - public ScanReportBuilder tableIdentifier(TableIdentifier tableIdentifier) { - this.tableIdentifier = tableIdentifier; + public ScanReportBuilder tableId(long tableId) { + this.tableId = tableId; + return this; + } + + /** + * Sets the namespace as a list of levels. + * + * @param namespace the namespace levels + * @return this builder + */ + public ScanReportBuilder namespace(List namespace) { + this.namespace = namespace != null ? namespace : Collections.emptyList(); return this; } @@ -123,8 +123,8 @@ public ScanMetricsRecord build() { return ScanMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespaceToList(tableIdentifier)) - .tableName(tableIdentifier.name()) + .namespace(namespace) + .tableId(tableId) .timestamp(Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) @@ -165,7 +165,8 @@ public ScanMetricsRecord build() { public static final class CommitReportBuilder { private final CommitReport commitReport; private long catalogId; - private TableIdentifier tableIdentifier; + private long tableId; + private List namespace = Collections.emptyList(); private CommitReportBuilder(CommitReport commitReport) { this.commitReport = commitReport; @@ -177,16 +178,26 @@ public CommitReportBuilder catalogId(long catalogId) { } /** - * Sets the table identifier including namespace and table name. + * Sets the table entity ID. + * + *

    This is the internal Polaris entity ID for the table. * - *

    The namespace and table name will be extracted from the TableIdentifier and stored as - * separate primitive fields in the SPI record. + * @param tableId the table entity ID + * @return this builder + */ + public CommitReportBuilder tableId(long tableId) { + this.tableId = tableId; + return this; + } + + /** + * Sets the namespace as a list of levels. * - * @param tableIdentifier the Iceberg table identifier + * @param namespace the namespace levels * @return this builder */ - public CommitReportBuilder tableIdentifier(TableIdentifier tableIdentifier) { - this.tableIdentifier = tableIdentifier; + public CommitReportBuilder namespace(List namespace) { + this.namespace = namespace != null ? namespace : Collections.emptyList(); return this; } @@ -198,8 +209,8 @@ public CommitMetricsRecord build() { return CommitMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespaceToList(tableIdentifier)) - .tableName(tableIdentifier.name()) + .namespace(namespace) + .tableId(tableId) .timestamp(Instant.now()) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index 736e0624c1..bf41995bfd 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -42,7 +42,7 @@ * * * - * + * * *
    PatternFields UsedIndex Required
    By Table + TimecatalogId, namespace, tableName, startTime, endTimeYes (OSS)
    By Table + TimecatalogId, tableId, startTime, endTimeYes (OSS)
    By Time OnlystartTime, endTimePartial (timestamp index)
    * @@ -88,8 +88,13 @@ public interface MetricsQueryCriteria { */ List namespace(); - /** Table name to filter by. */ - Optional tableName(); + /** + * Table entity ID to filter by. + * + *

    This is the internal table entity ID. Callers should resolve table names to IDs before + * querying, as table names can change over time. + */ + OptionalLong tableId(); // === Time Range === @@ -126,30 +131,26 @@ static ImmutableMetricsQueryCriteria.Builder builder() { } /** - * Creates criteria for querying by table and time range. + * Creates a builder pre-populated with table identification info. + * + *

    This allows the caller to add time ranges and other filters at the call site. This pattern + * is useful when table info is resolved in one place and time ranges are added elsewhere. + * + *

    Example usage: * - *

    Pagination is handled separately via the {@code PageToken} parameter to query methods. + *

    {@code
    +   * MetricsQueryCriteria criteria = MetricsQueryCriteria.forTable(catalogId, tableId)
    +   *     .startTime(startTime)
    +   *     .endTime(endTime)
    +   *     .build();
    +   * }
    * * @param catalogId the catalog entity ID - * @param namespace the namespace as a list of levels - * @param tableName the table name - * @param startTime the start time (inclusive) - * @param endTime the end time (exclusive) - * @return the query criteria + * @param tableId the table entity ID + * @return a builder pre-populated with table info, ready for adding time ranges */ - static MetricsQueryCriteria forTable( - long catalogId, - List namespace, - String tableName, - Instant startTime, - Instant endTime) { - return builder() - .catalogId(catalogId) - .namespace(namespace) - .tableName(tableName) - .startTime(startTime) - .endTime(endTime) - .build(); + static ImmutableMetricsQueryCriteria.Builder forTable(long catalogId, long tableId) { + return builder().catalogId(catalogId).tableId(tableId); } /** diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index afbcd1bf4d..7d31302e54 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -33,13 +33,14 @@ * *

    Design Decisions

    * - *

    Namespace/TableName as primitives: We use separate String fields for namespace and - * table name rather than Iceberg's {@code TableIdentifier} to avoid Iceberg dependencies in the - * Polaris SPI. The service layer can convert to/from {@code TableIdentifier} as needed. + *

    Entity IDs only (no names): We store only catalog ID and table ID, not their names. + * Names can change over time (via rename operations), which would make querying historical metrics + * by name challenging and lead to correctness issues. Queries should resolve names to IDs using the + * current catalog state. * - *

    Catalog ID only (no name): We store only the catalog ID, not the catalog name. Catalog - * names can change over time (via rename operations), which would make querying historical metrics - * by name challenging. Queries should resolve catalog names to IDs using the current catalog state. + *

    Namespace as List<String>: Namespaces are stored as a list of levels rather than + * a dot-separated string to avoid ambiguity when namespace segments contain dots. The persistence + * implementation handles the serialization format. * *

    Realm ID: Realm ID is intentionally not included in this interface. Multi-tenancy realm * context should be obtained from the CDI-injected {@code RealmContext} at persistence time. This @@ -74,11 +75,13 @@ public interface MetricsRecordIdentity { List namespace(); /** - * Table name. + * Internal table entity ID. * - *

    This is the table name portion of the table identifier, without the namespace prefix. + *

    This matches the table entity ID in Polaris persistence, as defined by {@code + * PolarisEntityCore#getId()}. The table name is not stored since it can change over time; queries + * should resolve names to IDs using the current catalog state. */ - String tableName(); + long tableId(); /** * Timestamp when the report was received. From 7d4212c29b545c4dadc142cf9a6a9a1278947f6b Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 18:04:35 -0800 Subject: [PATCH 09/67] refactor: Remove namespace from MetricsQueryCriteria Per reviewer feedback - since we query by tableId, namespace is implicit. If users want to query by namespace, the service layer should resolve namespace to table IDs using the current catalog state, then query by those IDs. This avoids confusion with table moves over time. Namespace is still stored in MetricsRecordIdentity for display purposes. --- .../persistence/metrics/MetricsQueryCriteria.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index bf41995bfd..a6bf10b952 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -19,7 +19,6 @@ package org.apache.polaris.core.persistence.metrics; import java.time.Instant; -import java.util.List; import java.util.Map; import java.util.Optional; import java.util.OptionalLong; @@ -80,19 +79,16 @@ public interface MetricsQueryCriteria { */ OptionalLong catalogId(); - /** - * Namespace to filter by. - * - *

    The namespace is represented as a list of levels to avoid ambiguity when segments contain - * dots. An empty list means no namespace filter is applied. - */ - List namespace(); - /** * Table entity ID to filter by. * *

    This is the internal table entity ID. Callers should resolve table names to IDs before * querying, as table names can change over time. + * + *

    Note: Namespace is intentionally not included as a query filter. Since we query by table ID, + * the namespace is implicit. If users want to query by namespace, the service layer should + * resolve namespace to table IDs using the current catalog state, then query by those IDs. This + * avoids confusion with table moves over time. */ OptionalLong tableId(); From 9ac1f462f657cece7e222782446e579b2389145e Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 23 Jan 2026 16:14:03 -0800 Subject: [PATCH 10/67] Add schema-v4.sql for metrics tables (#3337) Add new schema version 4 with tables for storing scan and commit metrics reports as first-class entities. New tables: - scan_metrics_report: Stores scan metrics with trace correlation - scan_metrics_report_roles: Junction table for principal roles - commit_metrics_report: Stores commit metrics with trace correlation - commit_metrics_report_roles: Junction table for principal roles Key design decisions: - PRIMARY KEY (realm_id, report_id) for multi-tenancy - Junction tables with CASCADE DELETE for roles - Timestamp index for retention cleanup - JSONB metadata column for extensibility (Postgres), TEXT for H2 Schema is self-contained (includes all tables from v1-v3) to support fresh installs. --- .../src/main/resources/h2/schema-v4.sql | 196 +++++++++++++++- .../src/main/resources/postgres/schema-v4.sql | 212 +++++++++++++++++- 2 files changed, 388 insertions(+), 20 deletions(-) diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql index 0f2ac75cc6..d70da95e85 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql @@ -17,13 +17,27 @@ -- under the License. -- --- Changes from v2: --- * Added `events` table --- * Added `idempotency_records` table for REST idempotency +-- ============================================================================ +-- POLARIS JDBC SCHEMA VERSION 4 (H2) +-- ============================================================================ +-- This schema is SELF-CONTAINED and can be used for fresh installs. +-- Each schema version includes ALL tables, not just incremental changes. +-- +-- Changes from v3: +-- * Added `events` table +-- * Added `idempotency_records` table for REST idempotency +-- * Added `scan_metrics_report` table for scan metrics as first-class entities +-- * Added `scan_metrics_report_roles` junction table for principal roles +-- * Added `commit_metrics_report` table for commit metrics as first-class entities +-- * Added `commit_metrics_report_roles` junction table for principal roles +-- ============================================================================ CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET SCHEMA POLARIS_SCHEMA; +-- ============================================================================ +-- VERSION TABLE +-- ============================================================================ CREATE TABLE IF NOT EXISTS version ( version_key VARCHAR PRIMARY KEY, version_value INTEGER NOT NULL @@ -31,11 +45,15 @@ CREATE TABLE IF NOT EXISTS version ( MERGE INTO version (version_key, version_value) KEY (version_key) - VALUES ('version', 3); + VALUES ('version', 4); --- H2 supports COMMENT, but some modes may ignore it COMMENT ON TABLE version IS 'the version of the JDBC schema in use'; +-- ============================================================================ +-- CORE TABLES (from v1) +-- ============================================================================ + +-- Entities table: stores all Polaris entities (catalogs, namespaces, tables, etc.) CREATE TABLE IF NOT EXISTS entities ( realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, @@ -59,12 +77,9 @@ CREATE TABLE IF NOT EXISTS entities ( ); CREATE INDEX IF NOT EXISTS idx_locations ON entities(realm_id, catalog_id, location_without_scheme); - --- TODO: create indexes based on all query pattern. CREATE INDEX IF NOT EXISTS idx_entities ON entities (realm_id, catalog_id, id); COMMENT ON TABLE entities IS 'all the entities'; - COMMENT ON COLUMN entities.catalog_id IS 'catalog id'; COMMENT ON COLUMN entities.id IS 'entity id'; COMMENT ON COLUMN entities.parent_id IS 'entity id of parent'; @@ -80,6 +95,7 @@ COMMENT ON COLUMN entities.properties IS 'entities properties json'; COMMENT ON COLUMN entities.internal_properties IS 'entities internal properties json'; COMMENT ON COLUMN entities.grant_records_version IS 'the version of grant records change on the entity'; +-- Grant records table: stores privilege grants CREATE TABLE IF NOT EXISTS grant_records ( realm_id TEXT NOT NULL, securable_catalog_id BIGINT NOT NULL, @@ -97,6 +113,7 @@ COMMENT ON COLUMN grant_records.grantee_catalog_id IS 'catalog id of the grantee COMMENT ON COLUMN grant_records.grantee_id IS 'id of the grantee'; COMMENT ON COLUMN grant_records.privilege_code IS 'privilege code'; +-- Principal authentication data table CREATE TABLE IF NOT EXISTS principal_authentication_data ( realm_id TEXT NOT NULL, principal_id BIGINT NOT NULL, @@ -109,6 +126,7 @@ CREATE TABLE IF NOT EXISTS principal_authentication_data ( COMMENT ON TABLE principal_authentication_data IS 'authentication data for client'; +-- Policy mapping record table (from v2) CREATE TABLE IF NOT EXISTS policy_mapping_record ( realm_id TEXT NOT NULL, target_catalog_id BIGINT NOT NULL, @@ -122,6 +140,10 @@ CREATE TABLE IF NOT EXISTS policy_mapping_record ( CREATE INDEX IF NOT EXISTS idx_policy_mapping_record ON policy_mapping_record (realm_id, policy_type_code, policy_catalog_id, policy_id, target_catalog_id, target_id); +-- ============================================================================ +-- EVENTS TABLE (NEW in v4) +-- ============================================================================ + CREATE TABLE IF NOT EXISTS events ( realm_id TEXT NOT NULL, catalog_id TEXT NOT NULL, @@ -136,6 +158,10 @@ CREATE TABLE IF NOT EXISTS events ( PRIMARY KEY (event_id) ); +-- ============================================================================ +-- IDEMPOTENCY RECORDS TABLE (NEW in v4) +-- ============================================================================ + CREATE TABLE IF NOT EXISTS idempotency_records ( realm_id TEXT NOT NULL, idempotency_key TEXT NOT NULL, @@ -160,4 +186,156 @@ CREATE TABLE IF NOT EXISTS idempotency_records ( ); CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires - ON idempotency_records (realm_id, expires_at); \ No newline at end of file + ON idempotency_records (realm_id, expires_at); + +-- ============================================================================ +-- METRICS TABLES (NEW in v4) +-- ============================================================================ + +-- Scan Metrics Report Table +CREATE TABLE IF NOT EXISTS scan_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id TEXT NOT NULL, + catalog_name TEXT NOT NULL, + namespace TEXT NOT NULL, + table_name TEXT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Scan context + snapshot_id BIGINT, + schema_id INTEGER, + filter_expression TEXT, + projected_field_ids TEXT, + projected_field_names TEXT, + + -- Scan metrics + result_data_files BIGINT DEFAULT 0, + result_delete_files BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + total_data_manifests BIGINT DEFAULT 0, + total_delete_manifests BIGINT DEFAULT 0, + scanned_data_manifests BIGINT DEFAULT 0, + scanned_delete_manifests BIGINT DEFAULT 0, + skipped_data_manifests BIGINT DEFAULT 0, + skipped_delete_manifests BIGINT DEFAULT 0, + skipped_data_files BIGINT DEFAULT 0, + skipped_delete_files BIGINT DEFAULT 0, + total_planning_duration_ms BIGINT DEFAULT 0, + + -- Equality/positional delete metrics + equality_delete_files BIGINT DEFAULT 0, + positional_delete_files BIGINT DEFAULT 0, + indexed_delete_files BIGINT DEFAULT 0, + total_delete_file_size_bytes BIGINT DEFAULT 0, + + -- Additional metadata (for extensibility) + metadata TEXT DEFAULT '{}', + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE scan_metrics_report IS 'Scan metrics reports as first-class entities'; + +-- Indexes for scan_metrics_report +-- Note: Additional indexes for query patterns (by table, trace_id, principal) can be added +-- when analytics APIs are introduced. Currently only timestamp index is needed for retention cleanup. +CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp ON scan_metrics_report(realm_id, timestamp_ms); + +-- Junction table for scan metrics report roles +CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; + +-- Commit Metrics Report Entity Table +CREATE TABLE IF NOT EXISTS commit_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id TEXT NOT NULL, + catalog_name TEXT NOT NULL, + namespace TEXT NOT NULL, + table_name TEXT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Commit context + snapshot_id BIGINT NOT NULL, + sequence_number BIGINT, + operation TEXT NOT NULL, + + -- File metrics + added_data_files BIGINT DEFAULT 0, + removed_data_files BIGINT DEFAULT 0, + total_data_files BIGINT DEFAULT 0, + added_delete_files BIGINT DEFAULT 0, + removed_delete_files BIGINT DEFAULT 0, + total_delete_files BIGINT DEFAULT 0, + + -- Equality delete files + added_equality_delete_files BIGINT DEFAULT 0, + removed_equality_delete_files BIGINT DEFAULT 0, + + -- Positional delete files + added_positional_delete_files BIGINT DEFAULT 0, + removed_positional_delete_files BIGINT DEFAULT 0, + + -- Record metrics + added_records BIGINT DEFAULT 0, + removed_records BIGINT DEFAULT 0, + total_records BIGINT DEFAULT 0, + + -- Size metrics + added_file_size_bytes BIGINT DEFAULT 0, + removed_file_size_bytes BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + + -- Duration and attempts + total_duration_ms BIGINT DEFAULT 0, + attempts INTEGER DEFAULT 1, + + -- Additional metadata (for extensibility) + metadata TEXT DEFAULT '{}', + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE commit_metrics_report IS 'Commit metrics reports as first-class entities'; + +-- Indexes for commit_metrics_report +-- Note: Additional indexes for query patterns (by table, trace_id, principal, operation, snapshot) +-- can be added when analytics APIs are introduced. Currently only timestamp index is needed for retention cleanup. +CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp ON commit_metrics_report(realm_id, timestamp_ms); + +-- Junction table for commit metrics report roles +CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql index 47d4ea8834..2b96a5ca0f 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql @@ -1,7 +1,7 @@ -- -- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file-- --- distributed with this work for additional information +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information -- regarding copyright ownership. The ASF licenses this file -- to you under the Apache License, Version 2.0 (the -- "License"). You may not use this file except in compliance @@ -15,24 +15,44 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. +-- --- Changes from v2: --- * Added `events` table --- * Added `idempotency_records` table for REST idempotency +-- ============================================================================ +-- POLARIS JDBC SCHEMA VERSION 4 (PostgreSQL) +-- ============================================================================ +-- This schema is SELF-CONTAINED and can be used for fresh installs. +-- Each schema version includes ALL tables, not just incremental changes. +-- +-- Changes from v3: +-- * Added `events` table +-- * Added `idempotency_records` table for REST idempotency +-- * Added `scan_metrics_report` table for scan metrics as first-class entities +-- * Added `scan_metrics_report_roles` junction table for principal roles +-- * Added `commit_metrics_report` table for commit metrics as first-class entities +-- * Added `commit_metrics_report_roles` junction table for principal roles +-- ============================================================================ CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET search_path TO POLARIS_SCHEMA; +-- ============================================================================ +-- VERSION TABLE +-- ============================================================================ CREATE TABLE IF NOT EXISTS version ( version_key TEXT PRIMARY KEY, version_value INTEGER NOT NULL ); INSERT INTO version (version_key, version_value) -VALUES ('version', 3) +VALUES ('version', 4) ON CONFLICT (version_key) DO UPDATE SET version_value = EXCLUDED.version_value; COMMENT ON TABLE version IS 'the version of the JDBC schema in use'; +-- ============================================================================ +-- CORE TABLES (from v1) +-- ============================================================================ + +-- Entities table: stores all Polaris entities (catalogs, namespaces, tables, etc.) CREATE TABLE IF NOT EXISTS entities ( realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, @@ -55,14 +75,12 @@ CREATE TABLE IF NOT EXISTS entities ( CONSTRAINT constraint_name UNIQUE (realm_id, catalog_id, parent_id, type_code, name) ); --- TODO: create indexes based on all query pattern. CREATE INDEX IF NOT EXISTS idx_entities ON entities (realm_id, catalog_id, id); CREATE INDEX IF NOT EXISTS idx_locations ON entities USING btree (realm_id, parent_id, location_without_scheme) WHERE location_without_scheme IS NOT NULL; COMMENT ON TABLE entities IS 'all the entities'; - COMMENT ON COLUMN entities.realm_id IS 'realm_id used for multi-tenancy'; COMMENT ON COLUMN entities.catalog_id IS 'catalog id'; COMMENT ON COLUMN entities.id IS 'entity id'; @@ -79,6 +97,7 @@ COMMENT ON COLUMN entities.properties IS 'entities properties json'; COMMENT ON COLUMN entities.internal_properties IS 'entities internal properties json'; COMMENT ON COLUMN entities.grant_records_version IS 'the version of grant records change on the entity'; +-- Grant records table: stores privilege grants CREATE TABLE IF NOT EXISTS grant_records ( realm_id TEXT NOT NULL, securable_catalog_id BIGINT NOT NULL, @@ -90,13 +109,13 @@ CREATE TABLE IF NOT EXISTS grant_records ( ); COMMENT ON TABLE grant_records IS 'grant records for entities'; - COMMENT ON COLUMN grant_records.securable_catalog_id IS 'catalog id of the securable'; COMMENT ON COLUMN grant_records.securable_id IS 'entity id of the securable'; COMMENT ON COLUMN grant_records.grantee_catalog_id IS 'catalog id of the grantee'; COMMENT ON COLUMN grant_records.grantee_id IS 'id of the grantee'; COMMENT ON COLUMN grant_records.privilege_code IS 'privilege code'; +-- Principal authentication data table CREATE TABLE IF NOT EXISTS principal_authentication_data ( realm_id TEXT NOT NULL, principal_id BIGINT NOT NULL, @@ -109,6 +128,7 @@ CREATE TABLE IF NOT EXISTS principal_authentication_data ( COMMENT ON TABLE principal_authentication_data IS 'authentication data for client'; +-- Policy mapping record table (from v2) CREATE TABLE IF NOT EXISTS policy_mapping_record ( realm_id TEXT NOT NULL, target_catalog_id BIGINT NOT NULL, @@ -122,6 +142,10 @@ CREATE TABLE IF NOT EXISTS policy_mapping_record ( CREATE INDEX IF NOT EXISTS idx_policy_mapping_record ON policy_mapping_record (realm_id, policy_type_code, policy_catalog_id, policy_id, target_catalog_id, target_id); +-- ============================================================================ +-- EVENTS TABLE (NEW in v4) +-- ============================================================================ + CREATE TABLE IF NOT EXISTS events ( realm_id TEXT NOT NULL, catalog_id TEXT NOT NULL, @@ -136,7 +160,10 @@ CREATE TABLE IF NOT EXISTS events ( PRIMARY KEY (event_id) ); --- Idempotency records (key-only idempotency; durable replay) +-- ============================================================================ +-- IDEMPOTENCY RECORDS TABLE (NEW in v4) +-- ============================================================================ + CREATE TABLE IF NOT EXISTS idempotency_records ( realm_id TEXT NOT NULL, idempotency_key TEXT NOT NULL, @@ -160,6 +187,169 @@ CREATE TABLE IF NOT EXISTS idempotency_records ( PRIMARY KEY (realm_id, idempotency_key) ); --- Helpful indexes CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires ON idempotency_records (realm_id, expires_at); + +-- ============================================================================ +-- METRICS TABLES (NEW in v4) +-- ============================================================================ + +-- Scan Metrics Report Table +CREATE TABLE IF NOT EXISTS scan_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id TEXT NOT NULL, + catalog_name TEXT NOT NULL, + namespace TEXT NOT NULL, + table_name TEXT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Scan context + snapshot_id BIGINT, + schema_id INTEGER, + filter_expression TEXT, + projected_field_ids TEXT, + projected_field_names TEXT, + + -- Scan metrics + result_data_files BIGINT DEFAULT 0, + result_delete_files BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + total_data_manifests BIGINT DEFAULT 0, + total_delete_manifests BIGINT DEFAULT 0, + scanned_data_manifests BIGINT DEFAULT 0, + scanned_delete_manifests BIGINT DEFAULT 0, + skipped_data_manifests BIGINT DEFAULT 0, + skipped_delete_manifests BIGINT DEFAULT 0, + skipped_data_files BIGINT DEFAULT 0, + skipped_delete_files BIGINT DEFAULT 0, + total_planning_duration_ms BIGINT DEFAULT 0, + + -- Equality/positional delete metrics + equality_delete_files BIGINT DEFAULT 0, + positional_delete_files BIGINT DEFAULT 0, + indexed_delete_files BIGINT DEFAULT 0, + total_delete_file_size_bytes BIGINT DEFAULT 0, + + -- Additional metadata (for extensibility) + metadata JSONB DEFAULT '{}'::JSONB, + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE scan_metrics_report IS 'Scan metrics reports as first-class entities'; +COMMENT ON COLUMN scan_metrics_report.report_id IS 'Unique identifier for the report'; +COMMENT ON COLUMN scan_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; +COMMENT ON COLUMN scan_metrics_report.catalog_id IS 'Catalog ID'; +COMMENT ON COLUMN scan_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; +COMMENT ON COLUMN scan_metrics_report.report_trace_id IS 'Trace ID from report metadata'; + +-- Indexes for scan_metrics_report +-- Note: Additional indexes for query patterns (by table, trace_id, principal) can be added +-- when analytics APIs are introduced. Currently only timestamp index is needed for retention cleanup. +CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp + ON scan_metrics_report(realm_id, timestamp_ms DESC); + +-- Junction table for scan metrics report roles +CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; + + +-- Commit Metrics Report Entity Table +CREATE TABLE IF NOT EXISTS commit_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id TEXT NOT NULL, + catalog_name TEXT NOT NULL, + namespace TEXT NOT NULL, + table_name TEXT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Commit context + snapshot_id BIGINT NOT NULL, + sequence_number BIGINT, + operation TEXT NOT NULL, + + -- File metrics + added_data_files BIGINT DEFAULT 0, + removed_data_files BIGINT DEFAULT 0, + total_data_files BIGINT DEFAULT 0, + added_delete_files BIGINT DEFAULT 0, + removed_delete_files BIGINT DEFAULT 0, + total_delete_files BIGINT DEFAULT 0, + + -- Equality delete files + added_equality_delete_files BIGINT DEFAULT 0, + removed_equality_delete_files BIGINT DEFAULT 0, + + -- Positional delete files + added_positional_delete_files BIGINT DEFAULT 0, + removed_positional_delete_files BIGINT DEFAULT 0, + + -- Record metrics + added_records BIGINT DEFAULT 0, + removed_records BIGINT DEFAULT 0, + total_records BIGINT DEFAULT 0, + + -- Size metrics + added_file_size_bytes BIGINT DEFAULT 0, + removed_file_size_bytes BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + + -- Duration and attempts + total_duration_ms BIGINT DEFAULT 0, + attempts INTEGER DEFAULT 1, + + -- Additional metadata (for extensibility) + metadata JSONB DEFAULT '{}'::JSONB, + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE commit_metrics_report IS 'Commit metrics reports as first-class entities'; +COMMENT ON COLUMN commit_metrics_report.report_id IS 'Unique identifier for the report'; +COMMENT ON COLUMN commit_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; +COMMENT ON COLUMN commit_metrics_report.operation IS 'Commit operation type: append, overwrite, delete, replace'; +COMMENT ON COLUMN commit_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; + +-- Indexes for commit_metrics_report +-- Note: Additional indexes for query patterns (by table, trace_id, principal, operation, snapshot) +-- can be added when analytics APIs are introduced. Currently only timestamp index is needed for retention cleanup. +CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp + ON commit_metrics_report(realm_id, timestamp_ms DESC); + +-- Junction table for commit metrics report roles +CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; From de341e6e01edd9ac1801afb98ada928cd9b036d1 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Mon, 26 Jan 2026 16:10:53 -0800 Subject: [PATCH 11/67] Merged main --- .../src/main/resources/h2/schema-v4.sql | 80 +++++++++++-------- .../src/main/resources/postgres/schema-v4.sql | 32 +++----- 2 files changed, 59 insertions(+), 53 deletions(-) diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql index d70da95e85..d1fc4dfee3 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql @@ -17,12 +17,6 @@ -- under the License. -- --- ============================================================================ --- POLARIS JDBC SCHEMA VERSION 4 (H2) --- ============================================================================ --- This schema is SELF-CONTAINED and can be used for fresh installs. --- Each schema version includes ALL tables, not just incremental changes. --- -- Changes from v3: -- * Added `events` table -- * Added `idempotency_records` table for REST idempotency @@ -35,9 +29,6 @@ CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET SCHEMA POLARIS_SCHEMA; --- ============================================================================ --- VERSION TABLE --- ============================================================================ CREATE TABLE IF NOT EXISTS version ( version_key VARCHAR PRIMARY KEY, version_value INTEGER NOT NULL @@ -47,13 +38,9 @@ MERGE INTO version (version_key, version_value) KEY (version_key) VALUES ('version', 4); +-- H2 supports COMMENT, but some modes may ignore it COMMENT ON TABLE version IS 'the version of the JDBC schema in use'; --- ============================================================================ --- CORE TABLES (from v1) --- ============================================================================ - --- Entities table: stores all Polaris entities (catalogs, namespaces, tables, etc.) CREATE TABLE IF NOT EXISTS entities ( realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, @@ -77,9 +64,12 @@ CREATE TABLE IF NOT EXISTS entities ( ); CREATE INDEX IF NOT EXISTS idx_locations ON entities(realm_id, catalog_id, location_without_scheme); + +-- TODO: create indexes based on all query pattern. CREATE INDEX IF NOT EXISTS idx_entities ON entities (realm_id, catalog_id, id); COMMENT ON TABLE entities IS 'all the entities'; + COMMENT ON COLUMN entities.catalog_id IS 'catalog id'; COMMENT ON COLUMN entities.id IS 'entity id'; COMMENT ON COLUMN entities.parent_id IS 'entity id of parent'; @@ -95,7 +85,6 @@ COMMENT ON COLUMN entities.properties IS 'entities properties json'; COMMENT ON COLUMN entities.internal_properties IS 'entities internal properties json'; COMMENT ON COLUMN entities.grant_records_version IS 'the version of grant records change on the entity'; --- Grant records table: stores privilege grants CREATE TABLE IF NOT EXISTS grant_records ( realm_id TEXT NOT NULL, securable_catalog_id BIGINT NOT NULL, @@ -113,7 +102,6 @@ COMMENT ON COLUMN grant_records.grantee_catalog_id IS 'catalog id of the grantee COMMENT ON COLUMN grant_records.grantee_id IS 'id of the grantee'; COMMENT ON COLUMN grant_records.privilege_code IS 'privilege code'; --- Principal authentication data table CREATE TABLE IF NOT EXISTS principal_authentication_data ( realm_id TEXT NOT NULL, principal_id BIGINT NOT NULL, @@ -126,7 +114,6 @@ CREATE TABLE IF NOT EXISTS principal_authentication_data ( COMMENT ON TABLE principal_authentication_data IS 'authentication data for client'; --- Policy mapping record table (from v2) CREATE TABLE IF NOT EXISTS policy_mapping_record ( realm_id TEXT NOT NULL, target_catalog_id BIGINT NOT NULL, @@ -158,6 +145,32 @@ CREATE TABLE IF NOT EXISTS events ( PRIMARY KEY (event_id) ); +CREATE TABLE IF NOT EXISTS idempotency_records ( + realm_id TEXT NOT NULL, + idempotency_key TEXT NOT NULL, + operation_type TEXT NOT NULL, + resource_id TEXT NOT NULL, -- normalized request-derived resource identifier (not a generated entity id) + + -- Finalization/replay + http_status INTEGER, -- NULL while IN_PROGRESS; set only on finalized 2xx/terminal 4xx + error_subtype TEXT, -- optional: e.g., already_exists, namespace_not_empty, idempotency_replay_failed + response_summary TEXT, -- minimal body to reproduce equivalent response (JSON string) + response_headers TEXT, -- small whitelisted headers to replay (JSON string) + finalized_at TIMESTAMP, -- when http_status was written + + -- Liveness/ops + created_at TIMESTAMP NOT NULL, + updated_at TIMESTAMP NOT NULL, + heartbeat_at TIMESTAMP, -- updated by owner while IN_PROGRESS + executor_id TEXT, -- owner pod/worker id + expires_at TIMESTAMP, + + PRIMARY KEY (realm_id, idempotency_key) +); + +CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires + ON idempotency_records (realm_id, expires_at); + -- ============================================================================ -- IDEMPOTENCY RECORDS TABLE (NEW in v4) -- ============================================================================ @@ -200,24 +213,24 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( catalog_name TEXT NOT NULL, namespace TEXT NOT NULL, table_name TEXT NOT NULL, - + -- Report metadata timestamp_ms BIGINT NOT NULL, principal_name TEXT, request_id TEXT, - + -- Trace correlation otel_trace_id TEXT, otel_span_id TEXT, report_trace_id TEXT, - + -- Scan context snapshot_id BIGINT, schema_id INTEGER, filter_expression TEXT, projected_field_ids TEXT, projected_field_names TEXT, - + -- Scan metrics result_data_files BIGINT DEFAULT 0, result_delete_files BIGINT DEFAULT 0, @@ -231,13 +244,13 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( skipped_data_files BIGINT DEFAULT 0, skipped_delete_files BIGINT DEFAULT 0, total_planning_duration_ms BIGINT DEFAULT 0, - + -- Equality/positional delete metrics equality_delete_files BIGINT DEFAULT 0, positional_delete_files BIGINT DEFAULT 0, indexed_delete_files BIGINT DEFAULT 0, total_delete_file_size_bytes BIGINT DEFAULT 0, - + -- Additional metadata (for extensibility) metadata TEXT DEFAULT '{}', @@ -262,6 +275,7 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; + -- Commit Metrics Report Entity Table CREATE TABLE IF NOT EXISTS commit_metrics_report ( report_id TEXT NOT NULL, @@ -270,22 +284,22 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( catalog_name TEXT NOT NULL, namespace TEXT NOT NULL, table_name TEXT NOT NULL, - + -- Report metadata timestamp_ms BIGINT NOT NULL, principal_name TEXT, request_id TEXT, - + -- Trace correlation otel_trace_id TEXT, otel_span_id TEXT, report_trace_id TEXT, - + -- Commit context snapshot_id BIGINT NOT NULL, sequence_number BIGINT, operation TEXT NOT NULL, - + -- File metrics added_data_files BIGINT DEFAULT 0, removed_data_files BIGINT DEFAULT 0, @@ -293,29 +307,29 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( added_delete_files BIGINT DEFAULT 0, removed_delete_files BIGINT DEFAULT 0, total_delete_files BIGINT DEFAULT 0, - + -- Equality delete files added_equality_delete_files BIGINT DEFAULT 0, removed_equality_delete_files BIGINT DEFAULT 0, - + -- Positional delete files added_positional_delete_files BIGINT DEFAULT 0, removed_positional_delete_files BIGINT DEFAULT 0, - + -- Record metrics added_records BIGINT DEFAULT 0, removed_records BIGINT DEFAULT 0, total_records BIGINT DEFAULT 0, - + -- Size metrics added_file_size_bytes BIGINT DEFAULT 0, removed_file_size_bytes BIGINT DEFAULT 0, total_file_size_bytes BIGINT DEFAULT 0, - + -- Duration and attempts total_duration_ms BIGINT DEFAULT 0, attempts INTEGER DEFAULT 1, - + -- Additional metadata (for extensibility) metadata TEXT DEFAULT '{}', diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql index 2b96a5ca0f..15ca4b58a1 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql @@ -1,7 +1,7 @@ -- -- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information -- regarding copyright ownership. The ASF licenses this file -- to you under the Apache License, Version 2.0 (the -- "License"). You may not use this file except in compliance @@ -15,7 +15,6 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. --- -- ============================================================================ -- POLARIS JDBC SCHEMA VERSION 4 (PostgreSQL) @@ -35,9 +34,6 @@ CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET search_path TO POLARIS_SCHEMA; --- ============================================================================ --- VERSION TABLE --- ============================================================================ CREATE TABLE IF NOT EXISTS version ( version_key TEXT PRIMARY KEY, version_value INTEGER NOT NULL @@ -48,11 +44,6 @@ ON CONFLICT (version_key) DO UPDATE SET version_value = EXCLUDED.version_value; COMMENT ON TABLE version IS 'the version of the JDBC schema in use'; --- ============================================================================ --- CORE TABLES (from v1) --- ============================================================================ - --- Entities table: stores all Polaris entities (catalogs, namespaces, tables, etc.) CREATE TABLE IF NOT EXISTS entities ( realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, @@ -75,12 +66,14 @@ CREATE TABLE IF NOT EXISTS entities ( CONSTRAINT constraint_name UNIQUE (realm_id, catalog_id, parent_id, type_code, name) ); +-- TODO: create indexes based on all query pattern. CREATE INDEX IF NOT EXISTS idx_entities ON entities (realm_id, catalog_id, id); CREATE INDEX IF NOT EXISTS idx_locations ON entities USING btree (realm_id, parent_id, location_without_scheme) WHERE location_without_scheme IS NOT NULL; COMMENT ON TABLE entities IS 'all the entities'; + COMMENT ON COLUMN entities.realm_id IS 'realm_id used for multi-tenancy'; COMMENT ON COLUMN entities.catalog_id IS 'catalog id'; COMMENT ON COLUMN entities.id IS 'entity id'; @@ -97,7 +90,6 @@ COMMENT ON COLUMN entities.properties IS 'entities properties json'; COMMENT ON COLUMN entities.internal_properties IS 'entities internal properties json'; COMMENT ON COLUMN entities.grant_records_version IS 'the version of grant records change on the entity'; --- Grant records table: stores privilege grants CREATE TABLE IF NOT EXISTS grant_records ( realm_id TEXT NOT NULL, securable_catalog_id BIGINT NOT NULL, @@ -109,13 +101,13 @@ CREATE TABLE IF NOT EXISTS grant_records ( ); COMMENT ON TABLE grant_records IS 'grant records for entities'; + COMMENT ON COLUMN grant_records.securable_catalog_id IS 'catalog id of the securable'; COMMENT ON COLUMN grant_records.securable_id IS 'entity id of the securable'; COMMENT ON COLUMN grant_records.grantee_catalog_id IS 'catalog id of the grantee'; COMMENT ON COLUMN grant_records.grantee_id IS 'id of the grantee'; COMMENT ON COLUMN grant_records.privilege_code IS 'privilege code'; --- Principal authentication data table CREATE TABLE IF NOT EXISTS principal_authentication_data ( realm_id TEXT NOT NULL, principal_id BIGINT NOT NULL, @@ -128,7 +120,6 @@ CREATE TABLE IF NOT EXISTS principal_authentication_data ( COMMENT ON TABLE principal_authentication_data IS 'authentication data for client'; --- Policy mapping record table (from v2) CREATE TABLE IF NOT EXISTS policy_mapping_record ( realm_id TEXT NOT NULL, target_catalog_id BIGINT NOT NULL, @@ -187,6 +178,7 @@ CREATE TABLE IF NOT EXISTS idempotency_records ( PRIMARY KEY (realm_id, idempotency_key) ); +-- Helpful indexes CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires ON idempotency_records (realm_id, expires_at); @@ -202,24 +194,24 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( catalog_name TEXT NOT NULL, namespace TEXT NOT NULL, table_name TEXT NOT NULL, - + -- Report metadata timestamp_ms BIGINT NOT NULL, principal_name TEXT, request_id TEXT, - + -- Trace correlation otel_trace_id TEXT, otel_span_id TEXT, report_trace_id TEXT, - + -- Scan context snapshot_id BIGINT, schema_id INTEGER, filter_expression TEXT, projected_field_ids TEXT, projected_field_names TEXT, - + -- Scan metrics result_data_files BIGINT DEFAULT 0, result_delete_files BIGINT DEFAULT 0, @@ -233,13 +225,13 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( skipped_data_files BIGINT DEFAULT 0, skipped_delete_files BIGINT DEFAULT 0, total_planning_duration_ms BIGINT DEFAULT 0, - + -- Equality/positional delete metrics equality_delete_files BIGINT DEFAULT 0, positional_delete_files BIGINT DEFAULT 0, indexed_delete_files BIGINT DEFAULT 0, total_delete_file_size_bytes BIGINT DEFAULT 0, - + -- Additional metadata (for extensibility) metadata JSONB DEFAULT '{}'::JSONB, From 9260dda0600a155ccc3b0ade5a44209224e7d842 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 15:05:34 -0800 Subject: [PATCH 12/67] refactor: Update metrics schema to match SPI changes - Change catalog_id from TEXT to BIGINT to match SPI's long catalogId() - Remove catalog_name column (no longer stored per reviewer feedback) - Keep namespace as TEXT (persistence layer serializes List) --- .../relational-jdbc/src/main/resources/h2/schema-v4.sql | 6 ++---- .../src/main/resources/postgres/schema-v4.sql | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql index d1fc4dfee3..7e96d2d094 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql @@ -209,8 +209,7 @@ CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires CREATE TABLE IF NOT EXISTS scan_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id TEXT NOT NULL, - catalog_name TEXT NOT NULL, + catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, table_name TEXT NOT NULL, @@ -280,8 +279,7 @@ COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for sca CREATE TABLE IF NOT EXISTS commit_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id TEXT NOT NULL, - catalog_name TEXT NOT NULL, + catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, table_name TEXT NOT NULL, diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql index 15ca4b58a1..1eb43667a8 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql @@ -190,8 +190,7 @@ CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires CREATE TABLE IF NOT EXISTS scan_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id TEXT NOT NULL, - catalog_name TEXT NOT NULL, + catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, table_name TEXT NOT NULL, @@ -267,8 +266,7 @@ COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for sca CREATE TABLE IF NOT EXISTS commit_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id TEXT NOT NULL, - catalog_name TEXT NOT NULL, + catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, table_name TEXT NOT NULL, From 472f6f3f18f1b4c808e50006b9f2a51c4f530067 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 15:29:58 -0800 Subject: [PATCH 13/67] refactor: Change table_name to table_id in metrics schema Per SPI changes - using table entity ID instead of table name since names can change over time (via rename operations). --- .../relational-jdbc/src/main/resources/h2/schema-v4.sql | 4 ++-- .../relational-jdbc/src/main/resources/postgres/schema-v4.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql index 7e96d2d094..fd2bc29b50 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql @@ -211,7 +211,7 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, - table_name TEXT NOT NULL, + table_id BIGINT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, @@ -281,7 +281,7 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, - table_name TEXT NOT NULL, + table_id BIGINT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql index 1eb43667a8..530fc969d9 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql @@ -192,7 +192,7 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, - table_name TEXT NOT NULL, + table_id BIGINT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, @@ -268,7 +268,7 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, - table_name TEXT NOT NULL, + table_id BIGINT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, From b7925c88bd3d7a32fc72e0aa38c7b8a4e11e643c Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 16 Jan 2026 17:35:56 -0800 Subject: [PATCH 14/67] feat(events): Enable metrics event emission in reportMetrics() This commit adds the core infrastructure for emitting metrics as events when reportMetrics() is called on the Iceberg REST catalog API. Changes: - Add REPORT_METRICS_REQUEST attribute to EventAttributes.java - Add BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS to PolarisEventType.java - Update reportMetrics() in IcebergRestCatalogEventServiceDelegator.java to emit BEFORE/AFTER events with catalog name, namespace, table, and request - Add ReportMetricsEventTest.java with unit tests verifying event emission This enables event listeners to receive metrics report events, allowing for use cases like audit logging and metrics persistence. Added tests and a Feature flag --- CHANGELOG.md | 1 + .../core/config/FeatureConfiguration.java | 19 ++ ...ebergRestCatalogEventServiceDelegator.java | 44 +++- .../service/events/EventAttributes.java | 5 + .../service/events/PolarisEventType.java | 6 +- .../iceberg/ReportMetricsEventTest.java | 238 ++++++++++++++++++ .../apache/polaris/service/TestServices.java | 3 +- .../listeners/TestPolarisEventListener.java | 13 + 8 files changed, 324 insertions(+), 5 deletions(-) create mode 100644 runtime/service/src/test/java/org/apache/polaris/service/catalog/iceberg/ReportMetricsEventTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 34ef7108a6..5d545a4839 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -97,6 +97,7 @@ request adding CHANGELOG notes for breaking (!) changes and possibly other secti ### New Features +- Added `ENABLE_METRICS_EVENT_EMISSION` feature flag (default: false) to control the emission of `BEFORE_REPORT_METRICS` and `AFTER_REPORT_METRICS` events when the Iceberg REST catalog API's `reportMetrics()` method is called. When enabled, event listeners can receive metrics report data for use cases like audit logging and metrics persistence. Can be configured via `polaris.features."ENABLE_METRICS_EVENT_EMISSION"=true`. - Added `--no-sts` flag to CLI to support S3-compatible storage systems that do not have Security Token Service available. - Support credential vending for federated catalogs. `ALLOW_FEDERATED_CATALOGS_CREDENTIAL_VENDING` (default: true) was added to toggle this feature. - Enhanced catalog federation with SigV4 authentication support, additional authentication types for credential vending, and location-based access restrictions to block credential vending for remote tables outside allowed location lists. diff --git a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java index d9fe3d5641..90d5e16afa 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java @@ -540,4 +540,23 @@ public static void enforceFeatureEnabledOrThrow( .description("Metadata batch size for tasks that clean up dropped tables' files.") .defaultValue(10) .buildFeatureConfiguration(); + + /** + * Feature flag to control the emission of BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events + * when the Iceberg REST catalog API's reportMetrics() method is called. When disabled (default), + * the reportMetrics() method calls the delegate directly without emitting any events. When + * enabled, BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events are emitted, allowing event + * listeners to receive metrics report data for use cases like audit logging and metrics + * persistence. + */ + public static final FeatureConfiguration ENABLE_METRICS_EVENT_EMISSION = + PolarisConfiguration.builder() + .key("ENABLE_METRICS_EVENT_EMISSION") + .description( + "If set to true, emit BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events when " + + "the reportMetrics() API is called. This enables event listeners to receive " + + "metrics report data for use cases like audit logging and metrics persistence. " + + "Defaults to false to ensure backward compatibility.") + .defaultValue(false) + .buildFeatureConfiguration(); } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java index 00d0f6e08f..25179b8b25 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java @@ -43,6 +43,8 @@ import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.iceberg.rest.responses.LoadViewResponse; import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse; +import org.apache.polaris.core.config.FeatureConfiguration; +import org.apache.polaris.core.config.RealmConfig; import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.service.catalog.CatalogPrefixParser; import org.apache.polaris.service.catalog.api.IcebergRestCatalogApiService; @@ -67,6 +69,7 @@ public class IcebergRestCatalogEventServiceDelegator @Inject PolarisEventMetadataFactory eventMetadataFactory; @Inject CatalogPrefixParser prefixParser; @Inject EventAttributeMap eventAttributeMap; + @Inject RealmConfig realmConfig; // Constructor for testing - allows manual dependency injection @VisibleForTesting @@ -75,12 +78,14 @@ public IcebergRestCatalogEventServiceDelegator( PolarisEventListener polarisEventListener, PolarisEventMetadataFactory eventMetadataFactory, CatalogPrefixParser prefixParser, - EventAttributeMap eventAttributeMap) { + EventAttributeMap eventAttributeMap, + RealmConfig realmConfig) { this.delegate = delegate; this.polarisEventListener = polarisEventListener; this.eventMetadataFactory = eventMetadataFactory; this.prefixParser = prefixParser; this.eventAttributeMap = eventAttributeMap; + this.realmConfig = realmConfig; } // Default constructor for CDI @@ -821,8 +826,41 @@ public Response reportMetrics( ReportMetricsRequest reportMetricsRequest, RealmContext realmContext, SecurityContext securityContext) { - return delegate.reportMetrics( - prefix, namespace, table, reportMetricsRequest, realmContext, securityContext); + // Check if metrics event emission is enabled + boolean metricsEventEmissionEnabled = + realmConfig.getConfig(FeatureConfiguration.ENABLE_METRICS_EVENT_EMISSION); + + // If metrics event emission is disabled, call delegate directly without emitting events + if (!metricsEventEmissionEnabled) { + return delegate.reportMetrics( + prefix, namespace, table, reportMetricsRequest, realmContext, securityContext); + } + + // Emit events when feature is enabled + String catalogName = prefixParser.prefixToCatalogName(realmContext, prefix); + Namespace namespaceObj = decodeNamespace(namespace); + polarisEventListener.onEvent( + new PolarisEvent( + PolarisEventType.BEFORE_REPORT_METRICS, + eventMetadataFactory.create(), + new AttributeMap() + .put(EventAttributes.CATALOG_NAME, catalogName) + .put(EventAttributes.NAMESPACE, namespaceObj) + .put(EventAttributes.TABLE_NAME, table) + .put(EventAttributes.REPORT_METRICS_REQUEST, reportMetricsRequest))); + Response resp = + delegate.reportMetrics( + prefix, namespace, table, reportMetricsRequest, realmContext, securityContext); + polarisEventListener.onEvent( + new PolarisEvent( + PolarisEventType.AFTER_REPORT_METRICS, + eventMetadataFactory.create(), + new AttributeMap() + .put(EventAttributes.CATALOG_NAME, catalogName) + .put(EventAttributes.NAMESPACE, namespaceObj) + .put(EventAttributes.TABLE_NAME, table) + .put(EventAttributes.REPORT_METRICS_REQUEST, reportMetricsRequest))); + return resp; } @Override diff --git a/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java b/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java index 138133d62a..7638c4742d 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java @@ -30,6 +30,7 @@ import org.apache.iceberg.rest.requests.CreateViewRequest; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RenameTableRequest; +import org.apache.iceberg.rest.requests.ReportMetricsRequest; import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; import org.apache.iceberg.rest.requests.UpdateTableRequest; import org.apache.iceberg.rest.responses.ConfigResponse; @@ -237,4 +238,8 @@ private EventAttributes() {} new AttributeKey<>("detach_policy_request", DetachPolicyRequest.class); public static final AttributeKey GET_APPLICABLE_POLICIES_RESPONSE = new AttributeKey<>("get_applicable_policies_response", GetApplicablePoliciesResponse.class); + + // Metrics reporting attributes + public static final AttributeKey REPORT_METRICS_REQUEST = + new AttributeKey<>("report_metrics_request", ReportMetricsRequest.class); } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/events/PolarisEventType.java b/runtime/service/src/main/java/org/apache/polaris/service/events/PolarisEventType.java index de6b994608..a11d047b89 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/events/PolarisEventType.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/events/PolarisEventType.java @@ -193,7 +193,11 @@ public enum PolarisEventType { AFTER_ATTEMPT_TASK(1401), // Rate Limiting Events - BEFORE_LIMIT_REQUEST_RATE(1500); + BEFORE_LIMIT_REQUEST_RATE(1500), + + // Metrics Reporting Events + BEFORE_REPORT_METRICS(1600), + AFTER_REPORT_METRICS(1601); private final int code; diff --git a/runtime/service/src/test/java/org/apache/polaris/service/catalog/iceberg/ReportMetricsEventTest.java b/runtime/service/src/test/java/org/apache/polaris/service/catalog/iceberg/ReportMetricsEventTest.java new file mode 100644 index 0000000000..7522347f28 --- /dev/null +++ b/runtime/service/src/test/java/org/apache/polaris/service/catalog/iceberg/ReportMetricsEventTest.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.catalog.iceberg; + +import static org.apache.polaris.service.admin.PolarisAuthzTestBase.SCHEMA; +import static org.assertj.core.api.Assertions.assertThat; + +import jakarta.ws.rs.core.Response; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.metrics.ImmutableScanReport; +import org.apache.iceberg.metrics.ScanMetrics; +import org.apache.iceberg.metrics.ScanMetricsResult; +import org.apache.iceberg.rest.requests.CreateNamespaceRequest; +import org.apache.iceberg.rest.requests.CreateTableRequest; +import org.apache.iceberg.rest.requests.ReportMetricsRequest; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogProperties; +import org.apache.polaris.core.admin.model.CreateCatalogRequest; +import org.apache.polaris.core.admin.model.FileStorageConfigInfo; +import org.apache.polaris.core.admin.model.StorageConfigInfo; +import org.apache.polaris.service.TestServices; +import org.apache.polaris.service.events.EventAttributes; +import org.apache.polaris.service.events.PolarisEvent; +import org.apache.polaris.service.events.PolarisEventType; +import org.apache.polaris.service.events.listeners.TestPolarisEventListener; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Unit tests for verifying that reportMetrics() emits BEFORE_REPORT_METRICS and + * AFTER_REPORT_METRICS events. + */ +public class ReportMetricsEventTest { + private static final String NAMESPACE = "test_ns"; + private static final String CATALOG = "test-catalog"; + private static final String TABLE = "test-table"; + + private String catalogLocation; + + @BeforeEach + public void setUp(@TempDir Path tempDir) { + catalogLocation = tempDir.toAbsolutePath().toUri().toString(); + if (catalogLocation.endsWith("/")) { + catalogLocation = catalogLocation.substring(0, catalogLocation.length() - 1); + } + } + + @Test + void testReportMetricsEmitsBeforeAndAfterEventsWhenEnabled() { + // Create test services with ENABLE_METRICS_EVENT_EMISSION enabled + TestServices testServices = createTestServicesWithMetricsEmissionEnabled(true); + createCatalogAndNamespace(testServices); + createTable(testServices, TABLE); + + // Create a ScanReport for testing + ImmutableScanReport scanReport = + ImmutableScanReport.builder() + .schemaId(0) + .tableName(NAMESPACE + "." + TABLE) + .snapshotId(100L) + .addProjectedFieldIds(1) + .addProjectedFieldNames("id") + .filter(Expressions.alwaysTrue()) + .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) + .build(); + + ReportMetricsRequest request = ReportMetricsRequest.of(scanReport); + + // Call reportMetrics + try (Response response = + testServices + .restApi() + .reportMetrics( + CATALOG, + NAMESPACE, + TABLE, + request, + testServices.realmContext(), + testServices.securityContext())) { + assertThat(response.getStatus()).isEqualTo(Response.Status.NO_CONTENT.getStatusCode()); + } + + // Verify that BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events were emitted + TestPolarisEventListener testEventListener = + (TestPolarisEventListener) testServices.polarisEventListener(); + + PolarisEvent beforeEvent = testEventListener.getLatest(PolarisEventType.BEFORE_REPORT_METRICS); + assertThat(beforeEvent).isNotNull(); + assertThat(beforeEvent.attributes().getRequired(EventAttributes.CATALOG_NAME)) + .isEqualTo(CATALOG); + assertThat(beforeEvent.attributes().getRequired(EventAttributes.NAMESPACE)) + .isEqualTo(Namespace.of(NAMESPACE)); + assertThat(beforeEvent.attributes().getRequired(EventAttributes.TABLE_NAME)).isEqualTo(TABLE); + assertThat(beforeEvent.attributes().getRequired(EventAttributes.REPORT_METRICS_REQUEST)) + .isNotNull(); + + PolarisEvent afterEvent = testEventListener.getLatest(PolarisEventType.AFTER_REPORT_METRICS); + assertThat(afterEvent).isNotNull(); + assertThat(afterEvent.attributes().getRequired(EventAttributes.CATALOG_NAME)) + .isEqualTo(CATALOG); + assertThat(afterEvent.attributes().getRequired(EventAttributes.NAMESPACE)) + .isEqualTo(Namespace.of(NAMESPACE)); + assertThat(afterEvent.attributes().getRequired(EventAttributes.TABLE_NAME)).isEqualTo(TABLE); + assertThat(afterEvent.attributes().getRequired(EventAttributes.REPORT_METRICS_REQUEST)) + .isNotNull(); + } + + @Test + void testReportMetricsDoesNotEmitEventsWhenDisabled() { + // Create test services with ENABLE_METRICS_EVENT_EMISSION disabled (default) + TestServices testServices = createTestServicesWithMetricsEmissionEnabled(false); + createCatalogAndNamespace(testServices); + createTable(testServices, TABLE); + + // Create a ScanReport for testing + ImmutableScanReport scanReport = + ImmutableScanReport.builder() + .schemaId(0) + .tableName(NAMESPACE + "." + TABLE) + .snapshotId(100L) + .addProjectedFieldIds(1) + .addProjectedFieldNames("id") + .filter(Expressions.alwaysTrue()) + .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) + .build(); + + ReportMetricsRequest request = ReportMetricsRequest.of(scanReport); + + // Call reportMetrics + try (Response response = + testServices + .restApi() + .reportMetrics( + CATALOG, + NAMESPACE, + TABLE, + request, + testServices.realmContext(), + testServices.securityContext())) { + assertThat(response.getStatus()).isEqualTo(Response.Status.NO_CONTENT.getStatusCode()); + } + + // Verify that BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events were NOT emitted + TestPolarisEventListener testEventListener = + (TestPolarisEventListener) testServices.polarisEventListener(); + + assertThat(testEventListener.hasEvent(PolarisEventType.BEFORE_REPORT_METRICS)).isFalse(); + assertThat(testEventListener.hasEvent(PolarisEventType.AFTER_REPORT_METRICS)).isFalse(); + } + + private TestServices createTestServicesWithMetricsEmissionEnabled(boolean enabled) { + Map config = + Map.of( + "ALLOW_INSECURE_STORAGE_TYPES", + "true", + "SUPPORTED_CATALOG_STORAGE_TYPES", + List.of("FILE"), + "ENABLE_METRICS_EVENT_EMISSION", + String.valueOf(enabled)); + return TestServices.builder().config(config).withEventDelegator(true).build(); + } + + private void createCatalogAndNamespace(TestServices services) { + CatalogProperties.Builder propertiesBuilder = + CatalogProperties.builder() + .setDefaultBaseLocation(String.format("%s/%s", catalogLocation, CATALOG)); + + StorageConfigInfo config = + FileStorageConfigInfo.builder() + .setStorageType(StorageConfigInfo.StorageTypeEnum.FILE) + .build(); + Catalog catalogObject = + new Catalog( + Catalog.TypeEnum.INTERNAL, CATALOG, propertiesBuilder.build(), 0L, 0L, 1, config); + try (Response response = + services + .catalogsApi() + .createCatalog( + new CreateCatalogRequest(catalogObject), + services.realmContext(), + services.securityContext())) { + assertThat(response.getStatus()).isEqualTo(Response.Status.CREATED.getStatusCode()); + } + + CreateNamespaceRequest createNamespaceRequest = + CreateNamespaceRequest.builder().withNamespace(Namespace.of(NAMESPACE)).build(); + try (Response response = + services + .restApi() + .createNamespace( + CATALOG, + createNamespaceRequest, + services.realmContext(), + services.securityContext())) { + assertThat(response.getStatus()).isEqualTo(Response.Status.OK.getStatusCode()); + } + } + + private void createTable(TestServices services, String tableName) { + CreateTableRequest createTableRequest = + CreateTableRequest.builder() + .withName(tableName) + .withLocation( + String.format("%s/%s/%s/%s", catalogLocation, CATALOG, NAMESPACE, tableName)) + .withSchema(SCHEMA) + .build(); + services + .restApi() + .createTable( + CATALOG, + NAMESPACE, + createTableRequest, + null, + services.realmContext(), + services.securityContext()); + } +} diff --git a/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java b/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java index 7b4e5c2dda..40fca8c2c3 100644 --- a/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java +++ b/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java @@ -366,7 +366,8 @@ public String getAuthenticationScheme() { polarisEventListener, eventMetadataFactory, new DefaultCatalogPrefixParser(), - eventAttributeMap); + eventAttributeMap, + realmConfig); finalRestConfigurationService = new IcebergRestConfigurationEventServiceDelegator( catalogService, polarisEventListener, eventMetadataFactory); diff --git a/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java b/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java index e23a7a9264..a0973750b5 100644 --- a/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java +++ b/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java @@ -43,4 +43,17 @@ public PolarisEvent getLatest(PolarisEventType type) { } return latest; } + + /** + * Returns the latest event of the specified type, or null if no such event has been recorded. + * This is useful for tests that need to verify no event was emitted. + */ + public PolarisEvent getLatestOrNull(PolarisEventType type) { + return latestEvents.get(type); + } + + /** Returns true if an event of the specified type has been recorded. */ + public boolean hasEvent(PolarisEventType type) { + return latestEvents.containsKey(type); + } } From 6d5f2aa966059b84ecd926ecfb8153d96997bb68 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 16 Jan 2026 17:57:22 -0800 Subject: [PATCH 15/67] feat(events): Add metrics persistence handler for AFTER_REPORT_METRICS events This commit extends the PolarisPersistenceEventListener to handle AFTER_REPORT_METRICS events, enabling persistence of metrics report data. Changes: - Add handleAfterReportMetrics() method to PolarisPersistenceEventListener.java that extracts ScanReport and CommitReport data from metrics events - Add new imports for CommitReport, MetricsReport, ScanReport, and ReportMetricsRequest - Handle null values gracefully in metadata extraction (null-safety) - Add PolarisPersistenceEventListenerTest.java with comprehensive unit tests for null-safety and metadata extraction scenarios This builds on the metrics event emission infrastructure added in the previous commit and enables event listeners to persist metrics data. --- .../PolarisPersistenceEventListener.java | 60 +++++ .../PolarisPersistenceEventListenerTest.java | 212 ++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 runtime/service/src/test/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListenerTest.java diff --git a/runtime/service/src/main/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListener.java b/runtime/service/src/main/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListener.java index e073e93447..4f6105313c 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListener.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListener.java @@ -25,6 +25,10 @@ import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.MetricsReport; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.iceberg.rest.requests.ReportMetricsRequest; import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.polaris.core.admin.model.Catalog; import org.apache.polaris.core.auth.PolarisPrincipal; @@ -38,6 +42,7 @@ public void onEvent(PolarisEvent event) { switch (event.type()) { case AFTER_CREATE_TABLE -> handleAfterCreateTable(event); case AFTER_CREATE_CATALOG -> handleAfterCreateCatalog(event); + case AFTER_REPORT_METRICS -> handleAfterReportMetrics(event); default -> { // Other events not handled by this listener } @@ -90,6 +95,61 @@ private void handleAfterCreateCatalog(PolarisEvent event) { processEvent(event.metadata().realmId(), polarisEvent); } + private void handleAfterReportMetrics(PolarisEvent event) { + ReportMetricsRequest request = + event.attributes().getRequired(EventAttributes.REPORT_METRICS_REQUEST); + String catalogName = event.attributes().getRequired(EventAttributes.CATALOG_NAME); + Namespace namespace = event.attributes().getRequired(EventAttributes.NAMESPACE); + String tableName = event.attributes().getRequired(EventAttributes.TABLE_NAME); + + org.apache.polaris.core.entity.PolarisEvent polarisEvent = + new org.apache.polaris.core.entity.PolarisEvent( + catalogName, + event.metadata().eventId().toString(), + event.metadata().requestId().orElse(null), + event.type().name(), + event.metadata().timestamp().toEpochMilli(), + event.metadata().user().map(PolarisPrincipal::getName).orElse(null), + org.apache.polaris.core.entity.PolarisEvent.ResourceType.TABLE, + TableIdentifier.of(namespace, tableName).toString()); + + var additionalParameters = ImmutableMap.builder(); + MetricsReport report = request.report(); + if (report instanceof ScanReport scanReport) { + additionalParameters.put("report_type", "scan"); + additionalParameters.put("snapshot_id", String.valueOf(scanReport.snapshotId())); + additionalParameters.put("schema_id", String.valueOf(scanReport.schemaId())); + Map metadata = scanReport.metadata(); + if (metadata != null) { + metadata.forEach( + (key, value) -> { + if (value != null) { + additionalParameters.put("report." + key, value); + } + }); + } + } else if (report instanceof CommitReport commitReport) { + additionalParameters.put("report_type", "commit"); + additionalParameters.put("snapshot_id", String.valueOf(commitReport.snapshotId())); + additionalParameters.put("sequence_number", String.valueOf(commitReport.sequenceNumber())); + if (commitReport.operation() != null) { + additionalParameters.put("operation", commitReport.operation()); + } + Map metadata = commitReport.metadata(); + if (metadata != null) { + metadata.forEach( + (key, value) -> { + if (value != null) { + additionalParameters.put("report." + key, value); + } + }); + } + } + additionalParameters.putAll(event.metadata().openTelemetryContext()); + polarisEvent.setAdditionalProperties(additionalParameters.build()); + processEvent(event.metadata().realmId(), polarisEvent); + } + protected abstract void processEvent( String realmId, org.apache.polaris.core.entity.PolarisEvent event); } diff --git a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListenerTest.java b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListenerTest.java new file mode 100644 index 0000000000..290f09ec8f --- /dev/null +++ b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListenerTest.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.events.listeners; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableMap; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.ImmutableScanReport; +import org.apache.iceberg.metrics.ScanMetrics; +import org.apache.iceberg.metrics.ScanMetricsResult; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.iceberg.rest.requests.ReportMetricsRequest; +import org.apache.polaris.service.events.AttributeMap; +import org.apache.polaris.service.events.EventAttributes; +import org.apache.polaris.service.events.ImmutablePolarisEventMetadata; +import org.apache.polaris.service.events.PolarisEvent; +import org.apache.polaris.service.events.PolarisEventMetadata; +import org.apache.polaris.service.events.PolarisEventType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for {@link PolarisPersistenceEventListener} focusing on null-safety in metrics + * extraction. + */ +class PolarisPersistenceEventListenerTest { + + private TestPolarisPersistenceEventListener listener; + + @BeforeEach + void setUp() { + listener = new TestPolarisPersistenceEventListener(); + } + + @Test + void testScanReportWithNullMetadataValues() { + // Use mocks to simulate a ScanReport with null values in metadata + // (Iceberg's ImmutableScanReport.Builder doesn't allow nulls, but JSON deserialization might) + Map metadataWithNull = new HashMap<>(); + metadataWithNull.put("trace-id", "valid-trace-id"); + metadataWithNull.put("null-value-key", null); + + ScanReport mockScanReport = mock(ScanReport.class); + when(mockScanReport.snapshotId()).thenReturn(123L); + when(mockScanReport.schemaId()).thenReturn(0); + when(mockScanReport.metadata()).thenReturn(metadataWithNull); + when(mockScanReport.scanMetrics()).thenReturn(null); + + ReportMetricsRequest mockRequest = mock(ReportMetricsRequest.class); + when(mockRequest.report()).thenReturn(mockScanReport); + + PolarisEvent event = createAfterReportMetricsEvent(mockRequest); + + // Should not throw NPE + assertThatCode(() -> listener.onEvent(event)).doesNotThrowAnyException(); + + // Verify the valid metadata entry was captured, nulls were skipped + org.apache.polaris.core.entity.PolarisEvent persistedEvent = listener.getLastEvent(); + assertThat(persistedEvent).isNotNull(); + Map additionalProps = persistedEvent.getAdditionalPropertiesAsMap(); + assertThat(additionalProps).containsEntry("report.trace-id", "valid-trace-id"); + assertThat(additionalProps).doesNotContainKey("report.null-value-key"); + } + + @Test + void testCommitReportWithNullOperation() { + // Use mock to simulate a CommitReport with null operation + CommitReport mockCommitReport = mock(CommitReport.class); + when(mockCommitReport.snapshotId()).thenReturn(456L); + when(mockCommitReport.sequenceNumber()).thenReturn(1L); + when(mockCommitReport.operation()).thenReturn(null); // null operation + when(mockCommitReport.metadata()).thenReturn(ImmutableMap.of()); + when(mockCommitReport.commitMetrics()).thenReturn(null); + + ReportMetricsRequest mockRequest = mock(ReportMetricsRequest.class); + when(mockRequest.report()).thenReturn(mockCommitReport); + + PolarisEvent event = createAfterReportMetricsEvent(mockRequest); + + // Should not throw NPE + assertThatCode(() -> listener.onEvent(event)).doesNotThrowAnyException(); + + // Verify operation is not in additional properties (since it was null) + org.apache.polaris.core.entity.PolarisEvent persistedEvent = listener.getLastEvent(); + assertThat(persistedEvent).isNotNull(); + Map additionalProps = persistedEvent.getAdditionalPropertiesAsMap(); + assertThat(additionalProps) + .containsEntry("report_type", "commit") + .containsEntry("snapshot_id", "456") + .doesNotContainKey("operation"); + } + + @Test + void testCommitReportWithNullMetadataValues() { + // Use mock to simulate a CommitReport with null values in metadata + Map metadataWithNull = new HashMap<>(); + metadataWithNull.put("trace-id", "commit-trace-id"); + metadataWithNull.put("null-value-key", null); + + CommitReport mockCommitReport = mock(CommitReport.class); + when(mockCommitReport.snapshotId()).thenReturn(789L); + when(mockCommitReport.sequenceNumber()).thenReturn(2L); + when(mockCommitReport.operation()).thenReturn("append"); + when(mockCommitReport.metadata()).thenReturn(metadataWithNull); + when(mockCommitReport.commitMetrics()).thenReturn(null); + + ReportMetricsRequest mockRequest = mock(ReportMetricsRequest.class); + when(mockRequest.report()).thenReturn(mockCommitReport); + + PolarisEvent event = createAfterReportMetricsEvent(mockRequest); + + // Should not throw NPE + assertThatCode(() -> listener.onEvent(event)).doesNotThrowAnyException(); + + // Verify valid entries are captured, nulls are skipped + org.apache.polaris.core.entity.PolarisEvent persistedEvent = listener.getLastEvent(); + assertThat(persistedEvent).isNotNull(); + Map additionalProps = persistedEvent.getAdditionalPropertiesAsMap(); + assertThat(additionalProps) + .containsEntry("report.trace-id", "commit-trace-id") + .containsEntry("operation", "append") + .doesNotContainKey("report.null-value-key"); + } + + @Test + void testScanReportWithEmptyMetadata() { + ImmutableScanReport scanReport = + ImmutableScanReport.builder() + .schemaId(0) + .tableName("test_ns.test_table") + .snapshotId(100L) + .addProjectedFieldIds(1) + .addProjectedFieldNames("id") + .filter(Expressions.alwaysTrue()) + .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) + // Empty metadata map + .build(); + + ReportMetricsRequest request = ReportMetricsRequest.of(scanReport); + PolarisEvent event = createAfterReportMetricsEvent(request); + + // Should not throw any exception + assertThatCode(() -> listener.onEvent(event)).doesNotThrowAnyException(); + + org.apache.polaris.core.entity.PolarisEvent persistedEvent = listener.getLastEvent(); + assertThat(persistedEvent).isNotNull(); + Map additionalProps = persistedEvent.getAdditionalPropertiesAsMap(); + assertThat(additionalProps) + .containsEntry("report_type", "scan") + .containsEntry("snapshot_id", "100"); + } + + private PolarisEvent createAfterReportMetricsEvent(ReportMetricsRequest request) { + PolarisEventMetadata metadata = + ImmutablePolarisEventMetadata.builder() + .realmId("test-realm") + .requestId("test-request-id") + .openTelemetryContext(ImmutableMap.of()) + .build(); + + AttributeMap attributes = + new AttributeMap() + .put(EventAttributes.CATALOG_NAME, "test-catalog") + .put(EventAttributes.NAMESPACE, Namespace.of("test_ns")) + .put(EventAttributes.TABLE_NAME, "test_table") + .put(EventAttributes.REPORT_METRICS_REQUEST, request); + + return new PolarisEvent(PolarisEventType.AFTER_REPORT_METRICS, metadata, attributes); + } + + /** Concrete test implementation that captures persisted events for verification. */ + private static class TestPolarisPersistenceEventListener extends PolarisPersistenceEventListener { + private final Map events = + new ConcurrentHashMap<>(); + private org.apache.polaris.core.entity.PolarisEvent lastEvent; + + @Override + protected void processEvent(String realmId, org.apache.polaris.core.entity.PolarisEvent event) { + events.put(event.getId(), event); + lastEvent = event; + } + + public org.apache.polaris.core.entity.PolarisEvent getLastEvent() { + return lastEvent; + } + } +} From 850b3d16c46c4a1de9931e7b6d9cff85f60a19d0 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 16 Jan 2026 19:45:40 -0800 Subject: [PATCH 16/67] feat(persistence): Add database schema and model classes for metrics (PR#3) This commit adds the foundational persistence layer for metrics reports: Database Schema (v4): - Add scan_metrics_report table for storing scan metrics as first-class entities - Add commit_metrics_report table for storing commit metrics - Include indexes for efficient querying by realm, catalog, table, timestamp - Add trace correlation fields (otel_trace_id, otel_span_id, report_trace_id) - Provide both PostgreSQL and H2 schema definitions Model Classes: - ModelScanMetricsReport: Immutable model for scan metrics records - ModelCommitMetricsReport: Immutable model for commit metrics records - MetricsReportConverter: Common conversion utilities - ModelScanMetricsReportConverter: ResultSet to model conversion - ModelCommitMetricsReportConverter: ResultSet to model conversion Tests: - ModelScanMetricsReportTest: Unit tests for scan model - ModelCommitMetricsReportTest: Unit tests for commit model This is part of the metrics persistence feature and provides the data layer that will be used by the JDBC persistence implementation. --- .../jdbc/models/MetricsReportConverter.java | 275 ++++++++++++++++ .../jdbc/models/ModelCommitMetricsReport.java | 299 ++++++++++++++++++ .../ModelCommitMetricsReportConverter.java | 79 +++++ .../jdbc/models/ModelScanMetricsReport.java | 299 ++++++++++++++++++ .../ModelScanMetricsReportConverter.java | 76 +++++ .../src/main/resources/h2/schema-v4.sql | 58 +--- .../src/main/resources/postgres/schema-v4.sql | 39 +-- .../models/ModelCommitMetricsReportTest.java | 213 +++++++++++++ .../models/ModelScanMetricsReportTest.java | 212 +++++++++++++ 9 files changed, 1480 insertions(+), 70 deletions(-) create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java create mode 100644 persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java create mode 100644 persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java new file mode 100644 index 0000000000..c9660c2e84 --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc.models; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import jakarta.annotation.Nullable; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.stream.Collectors; +import org.apache.iceberg.metrics.CommitMetricsResult; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.CounterResult; +import org.apache.iceberg.metrics.ScanMetricsResult; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.iceberg.metrics.TimerResult; + +/** + * Converter utility class for transforming Iceberg metrics reports into persistence model classes. + */ +public final class MetricsReportConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private MetricsReportConverter() { + // Utility class + } + + /** + * Converts an Iceberg ScanReport to a ModelScanMetricsReport. + * + * @param scanReport the Iceberg scan report + * @param realmId the realm ID for multi-tenancy + * @param catalogId the catalog ID + * @param catalogName the catalog name + * @param namespace the namespace (dot-separated) + * @param principalName the principal who initiated the scan (optional) + * @param requestId the request ID (optional) + * @param otelTraceId OpenTelemetry trace ID (optional) + * @param otelSpanId OpenTelemetry span ID (optional) + * @return the converted ModelScanMetricsReport + */ + public static ModelScanMetricsReport fromScanReport( + ScanReport scanReport, + String realmId, + String catalogId, + String catalogName, + String namespace, + @Nullable String principalName, + @Nullable String requestId, + @Nullable String otelTraceId, + @Nullable String otelSpanId) { + + String reportId = UUID.randomUUID().toString(); + long timestampMs = System.currentTimeMillis(); + + ScanMetricsResult metrics = scanReport.scanMetrics(); + + ImmutableModelScanMetricsReport.Builder builder = + ImmutableModelScanMetricsReport.builder() + .reportId(reportId) + .realmId(realmId) + .catalogId(catalogId) + .catalogName(catalogName) + .namespace(namespace) + .tableName(scanReport.tableName()) + .timestampMs(timestampMs) + .principalName(principalName) + .requestId(requestId) + .otelTraceId(otelTraceId) + .otelSpanId(otelSpanId) + .snapshotId(scanReport.snapshotId()) + .schemaId(scanReport.schemaId()) + .filterExpression(scanReport.filter() != null ? scanReport.filter().toString() : null) + .projectedFieldIds(formatIntegerList(scanReport.projectedFieldIds())) + .projectedFieldNames(formatStringList(scanReport.projectedFieldNames())); + + // Extract metrics values + if (metrics != null) { + builder + .resultDataFiles(getCounterValue(metrics.resultDataFiles())) + .resultDeleteFiles(getCounterValue(metrics.resultDeleteFiles())) + .totalFileSizeBytes(getCounterValue(metrics.totalFileSizeInBytes())) + .totalDataManifests(getCounterValue(metrics.totalDataManifests())) + .totalDeleteManifests(getCounterValue(metrics.totalDeleteManifests())) + .scannedDataManifests(getCounterValue(metrics.scannedDataManifests())) + .scannedDeleteManifests(getCounterValue(metrics.scannedDeleteManifests())) + .skippedDataManifests(getCounterValue(metrics.skippedDataManifests())) + .skippedDeleteManifests(getCounterValue(metrics.skippedDeleteManifests())) + .skippedDataFiles(getCounterValue(metrics.skippedDataFiles())) + .skippedDeleteFiles(getCounterValue(metrics.skippedDeleteFiles())) + .totalPlanningDurationMs(getTimerValueMs(metrics.totalPlanningDuration())) + .equalityDeleteFiles(getCounterValue(metrics.equalityDeleteFiles())) + .positionalDeleteFiles(getCounterValue(metrics.positionalDeleteFiles())) + .indexedDeleteFiles(getCounterValue(metrics.indexedDeleteFiles())) + .totalDeleteFileSizeBytes(getCounterValue(metrics.totalDeleteFileSizeInBytes())); + } else { + builder + .resultDataFiles(0L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(0L) + .totalDataManifests(0L) + .totalDeleteManifests(0L) + .scannedDataManifests(0L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(0L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L); + } + + // Store additional metadata as JSON + Map metadata = scanReport.metadata(); + if (metadata != null && !metadata.isEmpty()) { + builder.metadata(toJson(metadata)); + } + + return builder.build(); + } + + /** + * Converts an Iceberg CommitReport to a ModelCommitMetricsReport. + * + * @param commitReport the Iceberg commit report + * @param realmId the realm ID for multi-tenancy + * @param catalogId the catalog ID + * @param catalogName the catalog name + * @param namespace the namespace (dot-separated) + * @param principalName the principal who initiated the commit (optional) + * @param requestId the request ID (optional) + * @param otelTraceId OpenTelemetry trace ID (optional) + * @param otelSpanId OpenTelemetry span ID (optional) + * @return the converted ModelCommitMetricsReport + */ + public static ModelCommitMetricsReport fromCommitReport( + CommitReport commitReport, + String realmId, + String catalogId, + String catalogName, + String namespace, + @Nullable String principalName, + @Nullable String requestId, + @Nullable String otelTraceId, + @Nullable String otelSpanId) { + + String reportId = UUID.randomUUID().toString(); + long timestampMs = System.currentTimeMillis(); + + CommitMetricsResult metrics = commitReport.commitMetrics(); + + ImmutableModelCommitMetricsReport.Builder builder = + ImmutableModelCommitMetricsReport.builder() + .reportId(reportId) + .realmId(realmId) + .catalogId(catalogId) + .catalogName(catalogName) + .namespace(namespace) + .tableName(commitReport.tableName()) + .timestampMs(timestampMs) + .principalName(principalName) + .requestId(requestId) + .otelTraceId(otelTraceId) + .otelSpanId(otelSpanId) + .snapshotId(commitReport.snapshotId()) + .sequenceNumber(commitReport.sequenceNumber()) + .operation(commitReport.operation() != null ? commitReport.operation() : "UNKNOWN"); + + // Extract metrics values + if (metrics != null) { + builder + .addedDataFiles(getCounterValue(metrics.addedDataFiles())) + .removedDataFiles(getCounterValue(metrics.removedDataFiles())) + .totalDataFiles(getCounterValue(metrics.totalDataFiles())) + .addedDeleteFiles(getCounterValue(metrics.addedDeleteFiles())) + .removedDeleteFiles(getCounterValue(metrics.removedDeleteFiles())) + .totalDeleteFiles(getCounterValue(metrics.totalDeleteFiles())) + .addedEqualityDeleteFiles(getCounterValue(metrics.addedEqualityDeleteFiles())) + .removedEqualityDeleteFiles(getCounterValue(metrics.removedEqualityDeleteFiles())) + .addedPositionalDeleteFiles(getCounterValue(metrics.addedPositionalDeleteFiles())) + .removedPositionalDeleteFiles(getCounterValue(metrics.removedPositionalDeleteFiles())) + .addedRecords(getCounterValue(metrics.addedRecords())) + .removedRecords(getCounterValue(metrics.removedRecords())) + .totalRecords(getCounterValue(metrics.totalRecords())) + .addedFileSizeBytes(getCounterValue(metrics.addedFilesSizeInBytes())) + .removedFileSizeBytes(getCounterValue(metrics.removedFilesSizeInBytes())) + .totalFileSizeBytes(getCounterValue(metrics.totalFilesSizeInBytes())) + .totalDurationMs(getTimerValueMs(metrics.totalDuration())) + .attempts(getCounterValueInt(metrics.attempts())); + } else { + builder + .addedDataFiles(0L) + .removedDataFiles(0L) + .totalDataFiles(0L) + .addedDeleteFiles(0L) + .removedDeleteFiles(0L) + .totalDeleteFiles(0L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(0L) + .removedPositionalDeleteFiles(0L) + .addedRecords(0L) + .removedRecords(0L) + .totalRecords(0L) + .addedFileSizeBytes(0L) + .removedFileSizeBytes(0L) + .totalFileSizeBytes(0L) + .totalDurationMs(0L) + .attempts(1); + } + + // Store additional metadata as JSON + Map metadata = commitReport.metadata(); + if (metadata != null && !metadata.isEmpty()) { + builder.metadata(toJson(metadata)); + } + + return builder.build(); + } + + private static long getCounterValue(@Nullable CounterResult counter) { + return counter != null ? counter.value() : 0L; + } + + private static int getCounterValueInt(@Nullable CounterResult counter) { + return counter != null ? (int) counter.value() : 1; + } + + private static long getTimerValueMs(@Nullable TimerResult timer) { + return timer != null && timer.totalDuration() != null ? timer.totalDuration().toMillis() : 0L; + } + + private static String formatIntegerList(@Nullable List list) { + if (list == null || list.isEmpty()) { + return null; + } + return list.stream().map(String::valueOf).collect(Collectors.joining(",")); + } + + private static String formatStringList(@Nullable List list) { + if (list == null || list.isEmpty()) { + return null; + } + return String.join(",", list); + } + + private static String toJson(Map map) { + try { + return OBJECT_MAPPER.writeValueAsString(map); + } catch (JsonProcessingException e) { + return "{}"; + } + } +} diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java new file mode 100644 index 0000000000..a2c7b420c6 --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc.models; + +import jakarta.annotation.Nullable; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.polaris.immutables.PolarisImmutable; +import org.apache.polaris.persistence.relational.jdbc.DatabaseType; + +/** Model class for commit_metrics_report table - stores commit metrics as first-class entities. */ +@PolarisImmutable +public interface ModelCommitMetricsReport extends Converter { + String TABLE_NAME = "COMMIT_METRICS_REPORT"; + + // Column names + String REPORT_ID = "report_id"; + String REALM_ID = "realm_id"; + String CATALOG_ID = "catalog_id"; + String CATALOG_NAME = "catalog_name"; + String NAMESPACE = "namespace"; + String TABLE_NAME_COL = "table_name"; + String TIMESTAMP_MS = "timestamp_ms"; + String PRINCIPAL_NAME = "principal_name"; + String REQUEST_ID = "request_id"; + String OTEL_TRACE_ID = "otel_trace_id"; + String OTEL_SPAN_ID = "otel_span_id"; + String REPORT_TRACE_ID = "report_trace_id"; + String SNAPSHOT_ID = "snapshot_id"; + String SEQUENCE_NUMBER = "sequence_number"; + String OPERATION = "operation"; + String ADDED_DATA_FILES = "added_data_files"; + String REMOVED_DATA_FILES = "removed_data_files"; + String TOTAL_DATA_FILES = "total_data_files"; + String ADDED_DELETE_FILES = "added_delete_files"; + String REMOVED_DELETE_FILES = "removed_delete_files"; + String TOTAL_DELETE_FILES = "total_delete_files"; + String ADDED_EQUALITY_DELETE_FILES = "added_equality_delete_files"; + String REMOVED_EQUALITY_DELETE_FILES = "removed_equality_delete_files"; + String ADDED_POSITIONAL_DELETE_FILES = "added_positional_delete_files"; + String REMOVED_POSITIONAL_DELETE_FILES = "removed_positional_delete_files"; + String ADDED_RECORDS = "added_records"; + String REMOVED_RECORDS = "removed_records"; + String TOTAL_RECORDS = "total_records"; + String ADDED_FILE_SIZE_BYTES = "added_file_size_bytes"; + String REMOVED_FILE_SIZE_BYTES = "removed_file_size_bytes"; + String TOTAL_FILE_SIZE_BYTES = "total_file_size_bytes"; + String TOTAL_DURATION_MS = "total_duration_ms"; + String ATTEMPTS = "attempts"; + String METADATA = "metadata"; + + List ALL_COLUMNS = + List.of( + REPORT_ID, + REALM_ID, + CATALOG_ID, + CATALOG_NAME, + NAMESPACE, + TABLE_NAME_COL, + TIMESTAMP_MS, + PRINCIPAL_NAME, + REQUEST_ID, + OTEL_TRACE_ID, + OTEL_SPAN_ID, + REPORT_TRACE_ID, + SNAPSHOT_ID, + SEQUENCE_NUMBER, + OPERATION, + ADDED_DATA_FILES, + REMOVED_DATA_FILES, + TOTAL_DATA_FILES, + ADDED_DELETE_FILES, + REMOVED_DELETE_FILES, + TOTAL_DELETE_FILES, + ADDED_EQUALITY_DELETE_FILES, + REMOVED_EQUALITY_DELETE_FILES, + ADDED_POSITIONAL_DELETE_FILES, + REMOVED_POSITIONAL_DELETE_FILES, + ADDED_RECORDS, + REMOVED_RECORDS, + TOTAL_RECORDS, + ADDED_FILE_SIZE_BYTES, + REMOVED_FILE_SIZE_BYTES, + TOTAL_FILE_SIZE_BYTES, + TOTAL_DURATION_MS, + ATTEMPTS, + METADATA); + + // Getters + String getReportId(); + + String getRealmId(); + + String getCatalogId(); + + String getCatalogName(); + + String getNamespace(); + + String getTableName(); + + long getTimestampMs(); + + @Nullable + String getPrincipalName(); + + @Nullable + String getRequestId(); + + @Nullable + String getOtelTraceId(); + + @Nullable + String getOtelSpanId(); + + @Nullable + String getReportTraceId(); + + long getSnapshotId(); + + @Nullable + Long getSequenceNumber(); + + String getOperation(); + + long getAddedDataFiles(); + + long getRemovedDataFiles(); + + long getTotalDataFiles(); + + long getAddedDeleteFiles(); + + long getRemovedDeleteFiles(); + + long getTotalDeleteFiles(); + + long getAddedEqualityDeleteFiles(); + + long getRemovedEqualityDeleteFiles(); + + long getAddedPositionalDeleteFiles(); + + long getRemovedPositionalDeleteFiles(); + + long getAddedRecords(); + + long getRemovedRecords(); + + long getTotalRecords(); + + long getAddedFileSizeBytes(); + + long getRemovedFileSizeBytes(); + + long getTotalFileSizeBytes(); + + long getTotalDurationMs(); + + int getAttempts(); + + @Nullable + String getMetadata(); + + @Override + default ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException { + return ImmutableModelCommitMetricsReport.builder() + .reportId(rs.getString(REPORT_ID)) + .realmId(rs.getString(REALM_ID)) + .catalogId(rs.getString(CATALOG_ID)) + .catalogName(rs.getString(CATALOG_NAME)) + .namespace(rs.getString(NAMESPACE)) + .tableName(rs.getString(TABLE_NAME_COL)) + .timestampMs(rs.getLong(TIMESTAMP_MS)) + .principalName(rs.getString(PRINCIPAL_NAME)) + .requestId(rs.getString(REQUEST_ID)) + .otelTraceId(rs.getString(OTEL_TRACE_ID)) + .otelSpanId(rs.getString(OTEL_SPAN_ID)) + .reportTraceId(rs.getString(REPORT_TRACE_ID)) + .snapshotId(rs.getLong(SNAPSHOT_ID)) + .sequenceNumber(rs.getObject(SEQUENCE_NUMBER, Long.class)) + .operation(rs.getString(OPERATION)) + .addedDataFiles(rs.getLong(ADDED_DATA_FILES)) + .removedDataFiles(rs.getLong(REMOVED_DATA_FILES)) + .totalDataFiles(rs.getLong(TOTAL_DATA_FILES)) + .addedDeleteFiles(rs.getLong(ADDED_DELETE_FILES)) + .removedDeleteFiles(rs.getLong(REMOVED_DELETE_FILES)) + .totalDeleteFiles(rs.getLong(TOTAL_DELETE_FILES)) + .addedEqualityDeleteFiles(rs.getLong(ADDED_EQUALITY_DELETE_FILES)) + .removedEqualityDeleteFiles(rs.getLong(REMOVED_EQUALITY_DELETE_FILES)) + .addedPositionalDeleteFiles(rs.getLong(ADDED_POSITIONAL_DELETE_FILES)) + .removedPositionalDeleteFiles(rs.getLong(REMOVED_POSITIONAL_DELETE_FILES)) + .addedRecords(rs.getLong(ADDED_RECORDS)) + .removedRecords(rs.getLong(REMOVED_RECORDS)) + .totalRecords(rs.getLong(TOTAL_RECORDS)) + .addedFileSizeBytes(rs.getLong(ADDED_FILE_SIZE_BYTES)) + .removedFileSizeBytes(rs.getLong(REMOVED_FILE_SIZE_BYTES)) + .totalFileSizeBytes(rs.getLong(TOTAL_FILE_SIZE_BYTES)) + .totalDurationMs(rs.getLong(TOTAL_DURATION_MS)) + .attempts(rs.getInt(ATTEMPTS)) + .metadata(rs.getString(METADATA)) + .build(); + } + + @Override + default Map toMap(DatabaseType databaseType) { + Map map = new LinkedHashMap<>(); + map.put(REPORT_ID, getReportId()); + map.put(REALM_ID, getRealmId()); + map.put(CATALOG_ID, getCatalogId()); + map.put(CATALOG_NAME, getCatalogName()); + map.put(NAMESPACE, getNamespace()); + map.put(TABLE_NAME_COL, getTableName()); + map.put(TIMESTAMP_MS, getTimestampMs()); + map.put(PRINCIPAL_NAME, getPrincipalName()); + map.put(REQUEST_ID, getRequestId()); + map.put(OTEL_TRACE_ID, getOtelTraceId()); + map.put(OTEL_SPAN_ID, getOtelSpanId()); + map.put(REPORT_TRACE_ID, getReportTraceId()); + map.put(SNAPSHOT_ID, getSnapshotId()); + map.put(SEQUENCE_NUMBER, getSequenceNumber()); + map.put(OPERATION, getOperation()); + map.put(ADDED_DATA_FILES, getAddedDataFiles()); + map.put(REMOVED_DATA_FILES, getRemovedDataFiles()); + map.put(TOTAL_DATA_FILES, getTotalDataFiles()); + map.put(ADDED_DELETE_FILES, getAddedDeleteFiles()); + map.put(REMOVED_DELETE_FILES, getRemovedDeleteFiles()); + map.put(TOTAL_DELETE_FILES, getTotalDeleteFiles()); + map.put(ADDED_EQUALITY_DELETE_FILES, getAddedEqualityDeleteFiles()); + map.put(REMOVED_EQUALITY_DELETE_FILES, getRemovedEqualityDeleteFiles()); + map.put(ADDED_POSITIONAL_DELETE_FILES, getAddedPositionalDeleteFiles()); + map.put(REMOVED_POSITIONAL_DELETE_FILES, getRemovedPositionalDeleteFiles()); + map.put(ADDED_RECORDS, getAddedRecords()); + map.put(REMOVED_RECORDS, getRemovedRecords()); + map.put(TOTAL_RECORDS, getTotalRecords()); + map.put(ADDED_FILE_SIZE_BYTES, getAddedFileSizeBytes()); + map.put(REMOVED_FILE_SIZE_BYTES, getRemovedFileSizeBytes()); + map.put(TOTAL_FILE_SIZE_BYTES, getTotalFileSizeBytes()); + map.put(TOTAL_DURATION_MS, getTotalDurationMs()); + map.put(ATTEMPTS, getAttempts()); + if (databaseType.equals(DatabaseType.POSTGRES)) { + map.put(METADATA, toJsonbPGobject(getMetadata() != null ? getMetadata() : "{}")); + } else { + map.put(METADATA, getMetadata() != null ? getMetadata() : "{}"); + } + return map; + } + + /** Dummy instance to be used as a Converter when calling fromResultSet(). */ + ModelCommitMetricsReport CONVERTER = + ImmutableModelCommitMetricsReport.builder() + .reportId("") + .realmId("") + .catalogId("") + .catalogName("") + .namespace("") + .tableName("") + .timestampMs(0L) + .snapshotId(0L) + .operation("") + .addedDataFiles(0L) + .removedDataFiles(0L) + .totalDataFiles(0L) + .addedDeleteFiles(0L) + .removedDeleteFiles(0L) + .totalDeleteFiles(0L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(0L) + .removedPositionalDeleteFiles(0L) + .addedRecords(0L) + .removedRecords(0L) + .totalRecords(0L) + .addedFileSizeBytes(0L) + .removedFileSizeBytes(0L) + .totalFileSizeBytes(0L) + .totalDurationMs(0L) + .attempts(1) + .build(); +} diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java new file mode 100644 index 0000000000..16417e4afe --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc.models; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Map; +import org.apache.polaris.persistence.relational.jdbc.DatabaseType; + +/** + * Converter for reading ModelCommitMetricsReport from database result sets. This class is needed + * because the Immutables-generated class cannot be instantiated without required fields. + */ +public class ModelCommitMetricsReportConverter implements Converter { + + @Override + public ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException { + return ImmutableModelCommitMetricsReport.builder() + .reportId(rs.getString(ModelCommitMetricsReport.REPORT_ID)) + .realmId(rs.getString(ModelCommitMetricsReport.REALM_ID)) + .catalogId(rs.getString(ModelCommitMetricsReport.CATALOG_ID)) + .catalogName(rs.getString(ModelCommitMetricsReport.CATALOG_NAME)) + .namespace(rs.getString(ModelCommitMetricsReport.NAMESPACE)) + .tableName(rs.getString(ModelCommitMetricsReport.TABLE_NAME_COL)) + .timestampMs(rs.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)) + .principalName(rs.getString(ModelCommitMetricsReport.PRINCIPAL_NAME)) + .requestId(rs.getString(ModelCommitMetricsReport.REQUEST_ID)) + .otelTraceId(rs.getString(ModelCommitMetricsReport.OTEL_TRACE_ID)) + .otelSpanId(rs.getString(ModelCommitMetricsReport.OTEL_SPAN_ID)) + .reportTraceId(rs.getString(ModelCommitMetricsReport.REPORT_TRACE_ID)) + .snapshotId(rs.getObject(ModelCommitMetricsReport.SNAPSHOT_ID, Long.class)) + .sequenceNumber(rs.getObject(ModelCommitMetricsReport.SEQUENCE_NUMBER, Long.class)) + .operation(rs.getString(ModelCommitMetricsReport.OPERATION)) + .addedDataFiles(rs.getLong(ModelCommitMetricsReport.ADDED_DATA_FILES)) + .removedDataFiles(rs.getLong(ModelCommitMetricsReport.REMOVED_DATA_FILES)) + .totalDataFiles(rs.getLong(ModelCommitMetricsReport.TOTAL_DATA_FILES)) + .addedDeleteFiles(rs.getLong(ModelCommitMetricsReport.ADDED_DELETE_FILES)) + .removedDeleteFiles(rs.getLong(ModelCommitMetricsReport.REMOVED_DELETE_FILES)) + .totalDeleteFiles(rs.getLong(ModelCommitMetricsReport.TOTAL_DELETE_FILES)) + .addedEqualityDeleteFiles(rs.getLong(ModelCommitMetricsReport.ADDED_EQUALITY_DELETE_FILES)) + .removedEqualityDeleteFiles( + rs.getLong(ModelCommitMetricsReport.REMOVED_EQUALITY_DELETE_FILES)) + .addedPositionalDeleteFiles( + rs.getLong(ModelCommitMetricsReport.ADDED_POSITIONAL_DELETE_FILES)) + .removedPositionalDeleteFiles( + rs.getLong(ModelCommitMetricsReport.REMOVED_POSITIONAL_DELETE_FILES)) + .addedRecords(rs.getLong(ModelCommitMetricsReport.ADDED_RECORDS)) + .removedRecords(rs.getLong(ModelCommitMetricsReport.REMOVED_RECORDS)) + .totalRecords(rs.getLong(ModelCommitMetricsReport.TOTAL_RECORDS)) + .addedFileSizeBytes(rs.getLong(ModelCommitMetricsReport.ADDED_FILE_SIZE_BYTES)) + .removedFileSizeBytes(rs.getLong(ModelCommitMetricsReport.REMOVED_FILE_SIZE_BYTES)) + .totalFileSizeBytes(rs.getLong(ModelCommitMetricsReport.TOTAL_FILE_SIZE_BYTES)) + .totalDurationMs(rs.getObject(ModelCommitMetricsReport.TOTAL_DURATION_MS, Long.class)) + .attempts(rs.getObject(ModelCommitMetricsReport.ATTEMPTS, Integer.class)) + .metadata(rs.getString(ModelCommitMetricsReport.METADATA)) + .build(); + } + + @Override + public Map toMap(DatabaseType databaseType) { + throw new UnsupportedOperationException("Converter is read-only"); + } +} diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java new file mode 100644 index 0000000000..8a44d44390 --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc.models; + +import jakarta.annotation.Nullable; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.polaris.immutables.PolarisImmutable; +import org.apache.polaris.persistence.relational.jdbc.DatabaseType; + +/** Model class for scan_metrics_report table - stores scan metrics as first-class entities. */ +@PolarisImmutable +public interface ModelScanMetricsReport extends Converter { + String TABLE_NAME = "SCAN_METRICS_REPORT"; + + // Column names + String REPORT_ID = "report_id"; + String REALM_ID = "realm_id"; + String CATALOG_ID = "catalog_id"; + String CATALOG_NAME = "catalog_name"; + String NAMESPACE = "namespace"; + String TABLE_NAME_COL = "table_name"; + String TIMESTAMP_MS = "timestamp_ms"; + String PRINCIPAL_NAME = "principal_name"; + String REQUEST_ID = "request_id"; + String OTEL_TRACE_ID = "otel_trace_id"; + String OTEL_SPAN_ID = "otel_span_id"; + String REPORT_TRACE_ID = "report_trace_id"; + String SNAPSHOT_ID = "snapshot_id"; + String SCHEMA_ID = "schema_id"; + String FILTER_EXPRESSION = "filter_expression"; + String PROJECTED_FIELD_IDS = "projected_field_ids"; + String PROJECTED_FIELD_NAMES = "projected_field_names"; + String RESULT_DATA_FILES = "result_data_files"; + String RESULT_DELETE_FILES = "result_delete_files"; + String TOTAL_FILE_SIZE_BYTES = "total_file_size_bytes"; + String TOTAL_DATA_MANIFESTS = "total_data_manifests"; + String TOTAL_DELETE_MANIFESTS = "total_delete_manifests"; + String SCANNED_DATA_MANIFESTS = "scanned_data_manifests"; + String SCANNED_DELETE_MANIFESTS = "scanned_delete_manifests"; + String SKIPPED_DATA_MANIFESTS = "skipped_data_manifests"; + String SKIPPED_DELETE_MANIFESTS = "skipped_delete_manifests"; + String SKIPPED_DATA_FILES = "skipped_data_files"; + String SKIPPED_DELETE_FILES = "skipped_delete_files"; + String TOTAL_PLANNING_DURATION_MS = "total_planning_duration_ms"; + String EQUALITY_DELETE_FILES = "equality_delete_files"; + String POSITIONAL_DELETE_FILES = "positional_delete_files"; + String INDEXED_DELETE_FILES = "indexed_delete_files"; + String TOTAL_DELETE_FILE_SIZE_BYTES = "total_delete_file_size_bytes"; + String METADATA = "metadata"; + + List ALL_COLUMNS = + List.of( + REPORT_ID, + REALM_ID, + CATALOG_ID, + CATALOG_NAME, + NAMESPACE, + TABLE_NAME_COL, + TIMESTAMP_MS, + PRINCIPAL_NAME, + REQUEST_ID, + OTEL_TRACE_ID, + OTEL_SPAN_ID, + REPORT_TRACE_ID, + SNAPSHOT_ID, + SCHEMA_ID, + FILTER_EXPRESSION, + PROJECTED_FIELD_IDS, + PROJECTED_FIELD_NAMES, + RESULT_DATA_FILES, + RESULT_DELETE_FILES, + TOTAL_FILE_SIZE_BYTES, + TOTAL_DATA_MANIFESTS, + TOTAL_DELETE_MANIFESTS, + SCANNED_DATA_MANIFESTS, + SCANNED_DELETE_MANIFESTS, + SKIPPED_DATA_MANIFESTS, + SKIPPED_DELETE_MANIFESTS, + SKIPPED_DATA_FILES, + SKIPPED_DELETE_FILES, + TOTAL_PLANNING_DURATION_MS, + EQUALITY_DELETE_FILES, + POSITIONAL_DELETE_FILES, + INDEXED_DELETE_FILES, + TOTAL_DELETE_FILE_SIZE_BYTES, + METADATA); + + // Getters + String getReportId(); + + String getRealmId(); + + String getCatalogId(); + + String getCatalogName(); + + String getNamespace(); + + String getTableName(); + + long getTimestampMs(); + + @Nullable + String getPrincipalName(); + + @Nullable + String getRequestId(); + + @Nullable + String getOtelTraceId(); + + @Nullable + String getOtelSpanId(); + + @Nullable + String getReportTraceId(); + + @Nullable + Long getSnapshotId(); + + @Nullable + Integer getSchemaId(); + + @Nullable + String getFilterExpression(); + + @Nullable + String getProjectedFieldIds(); + + @Nullable + String getProjectedFieldNames(); + + long getResultDataFiles(); + + long getResultDeleteFiles(); + + long getTotalFileSizeBytes(); + + long getTotalDataManifests(); + + long getTotalDeleteManifests(); + + long getScannedDataManifests(); + + long getScannedDeleteManifests(); + + long getSkippedDataManifests(); + + long getSkippedDeleteManifests(); + + long getSkippedDataFiles(); + + long getSkippedDeleteFiles(); + + long getTotalPlanningDurationMs(); + + long getEqualityDeleteFiles(); + + long getPositionalDeleteFiles(); + + long getIndexedDeleteFiles(); + + long getTotalDeleteFileSizeBytes(); + + @Nullable + String getMetadata(); + + @Override + default ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { + return ImmutableModelScanMetricsReport.builder() + .reportId(rs.getString(REPORT_ID)) + .realmId(rs.getString(REALM_ID)) + .catalogId(rs.getString(CATALOG_ID)) + .catalogName(rs.getString(CATALOG_NAME)) + .namespace(rs.getString(NAMESPACE)) + .tableName(rs.getString(TABLE_NAME_COL)) + .timestampMs(rs.getLong(TIMESTAMP_MS)) + .principalName(rs.getString(PRINCIPAL_NAME)) + .requestId(rs.getString(REQUEST_ID)) + .otelTraceId(rs.getString(OTEL_TRACE_ID)) + .otelSpanId(rs.getString(OTEL_SPAN_ID)) + .reportTraceId(rs.getString(REPORT_TRACE_ID)) + .snapshotId(rs.getObject(SNAPSHOT_ID, Long.class)) + .schemaId(rs.getObject(SCHEMA_ID, Integer.class)) + .filterExpression(rs.getString(FILTER_EXPRESSION)) + .projectedFieldIds(rs.getString(PROJECTED_FIELD_IDS)) + .projectedFieldNames(rs.getString(PROJECTED_FIELD_NAMES)) + .resultDataFiles(rs.getLong(RESULT_DATA_FILES)) + .resultDeleteFiles(rs.getLong(RESULT_DELETE_FILES)) + .totalFileSizeBytes(rs.getLong(TOTAL_FILE_SIZE_BYTES)) + .totalDataManifests(rs.getLong(TOTAL_DATA_MANIFESTS)) + .totalDeleteManifests(rs.getLong(TOTAL_DELETE_MANIFESTS)) + .scannedDataManifests(rs.getLong(SCANNED_DATA_MANIFESTS)) + .scannedDeleteManifests(rs.getLong(SCANNED_DELETE_MANIFESTS)) + .skippedDataManifests(rs.getLong(SKIPPED_DATA_MANIFESTS)) + .skippedDeleteManifests(rs.getLong(SKIPPED_DELETE_MANIFESTS)) + .skippedDataFiles(rs.getLong(SKIPPED_DATA_FILES)) + .skippedDeleteFiles(rs.getLong(SKIPPED_DELETE_FILES)) + .totalPlanningDurationMs(rs.getLong(TOTAL_PLANNING_DURATION_MS)) + .equalityDeleteFiles(rs.getLong(EQUALITY_DELETE_FILES)) + .positionalDeleteFiles(rs.getLong(POSITIONAL_DELETE_FILES)) + .indexedDeleteFiles(rs.getLong(INDEXED_DELETE_FILES)) + .totalDeleteFileSizeBytes(rs.getLong(TOTAL_DELETE_FILE_SIZE_BYTES)) + .metadata(rs.getString(METADATA)) + .build(); + } + + @Override + default Map toMap(DatabaseType databaseType) { + Map map = new LinkedHashMap<>(); + map.put(REPORT_ID, getReportId()); + map.put(REALM_ID, getRealmId()); + map.put(CATALOG_ID, getCatalogId()); + map.put(CATALOG_NAME, getCatalogName()); + map.put(NAMESPACE, getNamespace()); + map.put(TABLE_NAME_COL, getTableName()); + map.put(TIMESTAMP_MS, getTimestampMs()); + map.put(PRINCIPAL_NAME, getPrincipalName()); + map.put(REQUEST_ID, getRequestId()); + map.put(OTEL_TRACE_ID, getOtelTraceId()); + map.put(OTEL_SPAN_ID, getOtelSpanId()); + map.put(REPORT_TRACE_ID, getReportTraceId()); + map.put(SNAPSHOT_ID, getSnapshotId()); + map.put(SCHEMA_ID, getSchemaId()); + map.put(FILTER_EXPRESSION, getFilterExpression()); + map.put(PROJECTED_FIELD_IDS, getProjectedFieldIds()); + map.put(PROJECTED_FIELD_NAMES, getProjectedFieldNames()); + map.put(RESULT_DATA_FILES, getResultDataFiles()); + map.put(RESULT_DELETE_FILES, getResultDeleteFiles()); + map.put(TOTAL_FILE_SIZE_BYTES, getTotalFileSizeBytes()); + map.put(TOTAL_DATA_MANIFESTS, getTotalDataManifests()); + map.put(TOTAL_DELETE_MANIFESTS, getTotalDeleteManifests()); + map.put(SCANNED_DATA_MANIFESTS, getScannedDataManifests()); + map.put(SCANNED_DELETE_MANIFESTS, getScannedDeleteManifests()); + map.put(SKIPPED_DATA_MANIFESTS, getSkippedDataManifests()); + map.put(SKIPPED_DELETE_MANIFESTS, getSkippedDeleteManifests()); + map.put(SKIPPED_DATA_FILES, getSkippedDataFiles()); + map.put(SKIPPED_DELETE_FILES, getSkippedDeleteFiles()); + map.put(TOTAL_PLANNING_DURATION_MS, getTotalPlanningDurationMs()); + map.put(EQUALITY_DELETE_FILES, getEqualityDeleteFiles()); + map.put(POSITIONAL_DELETE_FILES, getPositionalDeleteFiles()); + map.put(INDEXED_DELETE_FILES, getIndexedDeleteFiles()); + map.put(TOTAL_DELETE_FILE_SIZE_BYTES, getTotalDeleteFileSizeBytes()); + if (databaseType.equals(DatabaseType.POSTGRES)) { + map.put(METADATA, toJsonbPGobject(getMetadata() != null ? getMetadata() : "{}")); + } else { + map.put(METADATA, getMetadata() != null ? getMetadata() : "{}"); + } + return map; + } + + /** Dummy instance to be used as a Converter when calling fromResultSet(). */ + ModelScanMetricsReport CONVERTER = + ImmutableModelScanMetricsReport.builder() + .reportId("") + .realmId("") + .catalogId("") + .catalogName("") + .namespace("") + .tableName("") + .timestampMs(0L) + .resultDataFiles(0L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(0L) + .totalDataManifests(0L) + .totalDeleteManifests(0L) + .scannedDataManifests(0L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(0L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); +} diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java new file mode 100644 index 0000000000..e754b3c85d --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc.models; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Map; +import org.apache.polaris.persistence.relational.jdbc.DatabaseType; + +/** + * Converter for reading ModelScanMetricsReport from database result sets. This class is needed + * because the Immutables-generated class cannot be instantiated without required fields. + */ +public class ModelScanMetricsReportConverter implements Converter { + + @Override + public ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { + return ImmutableModelScanMetricsReport.builder() + .reportId(rs.getString(ModelScanMetricsReport.REPORT_ID)) + .realmId(rs.getString(ModelScanMetricsReport.REALM_ID)) + .catalogId(rs.getString(ModelScanMetricsReport.CATALOG_ID)) + .catalogName(rs.getString(ModelScanMetricsReport.CATALOG_NAME)) + .namespace(rs.getString(ModelScanMetricsReport.NAMESPACE)) + .tableName(rs.getString(ModelScanMetricsReport.TABLE_NAME_COL)) + .timestampMs(rs.getLong(ModelScanMetricsReport.TIMESTAMP_MS)) + .principalName(rs.getString(ModelScanMetricsReport.PRINCIPAL_NAME)) + .requestId(rs.getString(ModelScanMetricsReport.REQUEST_ID)) + .otelTraceId(rs.getString(ModelScanMetricsReport.OTEL_TRACE_ID)) + .otelSpanId(rs.getString(ModelScanMetricsReport.OTEL_SPAN_ID)) + .reportTraceId(rs.getString(ModelScanMetricsReport.REPORT_TRACE_ID)) + .snapshotId(rs.getObject(ModelScanMetricsReport.SNAPSHOT_ID, Long.class)) + .schemaId(rs.getObject(ModelScanMetricsReport.SCHEMA_ID, Integer.class)) + .filterExpression(rs.getString(ModelScanMetricsReport.FILTER_EXPRESSION)) + .projectedFieldIds(rs.getString(ModelScanMetricsReport.PROJECTED_FIELD_IDS)) + .projectedFieldNames(rs.getString(ModelScanMetricsReport.PROJECTED_FIELD_NAMES)) + .resultDataFiles(rs.getLong(ModelScanMetricsReport.RESULT_DATA_FILES)) + .resultDeleteFiles(rs.getLong(ModelScanMetricsReport.RESULT_DELETE_FILES)) + .totalFileSizeBytes(rs.getLong(ModelScanMetricsReport.TOTAL_FILE_SIZE_BYTES)) + .totalDataManifests(rs.getLong(ModelScanMetricsReport.TOTAL_DATA_MANIFESTS)) + .totalDeleteManifests(rs.getLong(ModelScanMetricsReport.TOTAL_DELETE_MANIFESTS)) + .scannedDataManifests(rs.getLong(ModelScanMetricsReport.SCANNED_DATA_MANIFESTS)) + .scannedDeleteManifests(rs.getLong(ModelScanMetricsReport.SCANNED_DELETE_MANIFESTS)) + .skippedDataManifests(rs.getLong(ModelScanMetricsReport.SKIPPED_DATA_MANIFESTS)) + .skippedDeleteManifests(rs.getLong(ModelScanMetricsReport.SKIPPED_DELETE_MANIFESTS)) + .skippedDataFiles(rs.getLong(ModelScanMetricsReport.SKIPPED_DATA_FILES)) + .skippedDeleteFiles(rs.getLong(ModelScanMetricsReport.SKIPPED_DELETE_FILES)) + .totalPlanningDurationMs(rs.getLong(ModelScanMetricsReport.TOTAL_PLANNING_DURATION_MS)) + .equalityDeleteFiles(rs.getLong(ModelScanMetricsReport.EQUALITY_DELETE_FILES)) + .positionalDeleteFiles(rs.getLong(ModelScanMetricsReport.POSITIONAL_DELETE_FILES)) + .indexedDeleteFiles(rs.getLong(ModelScanMetricsReport.INDEXED_DELETE_FILES)) + .totalDeleteFileSizeBytes(rs.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)) + .metadata(rs.getString(ModelScanMetricsReport.METADATA)) + .build(); + } + + @Override + public Map toMap(DatabaseType databaseType) { + throw new UnsupportedOperationException("Converter is read-only"); + } +} diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql index fd2bc29b50..4fab1c5072 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql @@ -17,14 +17,14 @@ -- under the License. -- +-- Changes from v2: +-- * Added `events` table +-- * Added `idempotency_records` table for REST idempotency -- Changes from v3: --- * Added `events` table --- * Added `idempotency_records` table for REST idempotency --- * Added `scan_metrics_report` table for scan metrics as first-class entities --- * Added `scan_metrics_report_roles` junction table for principal roles --- * Added `commit_metrics_report` table for commit metrics as first-class entities --- * Added `commit_metrics_report_roles` junction table for principal roles --- ============================================================================ +-- * Added `scan_metrics_report` table for scan metrics as first-class entities +-- * Added `scan_metrics_report_roles` junction table for principal roles +-- * Added `commit_metrics_report` table for commit metrics as first-class entities +-- * Added `commit_metrics_report_roles` junction table for principal roles CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET SCHEMA POLARIS_SCHEMA; @@ -127,10 +127,6 @@ CREATE TABLE IF NOT EXISTS policy_mapping_record ( CREATE INDEX IF NOT EXISTS idx_policy_mapping_record ON policy_mapping_record (realm_id, policy_type_code, policy_catalog_id, policy_id, target_catalog_id, target_id); --- ============================================================================ --- EVENTS TABLE (NEW in v4) --- ============================================================================ - CREATE TABLE IF NOT EXISTS events ( realm_id TEXT NOT NULL, catalog_id TEXT NOT NULL, @@ -168,36 +164,6 @@ CREATE TABLE IF NOT EXISTS idempotency_records ( PRIMARY KEY (realm_id, idempotency_key) ); -CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires - ON idempotency_records (realm_id, expires_at); - --- ============================================================================ --- IDEMPOTENCY RECORDS TABLE (NEW in v4) --- ============================================================================ - -CREATE TABLE IF NOT EXISTS idempotency_records ( - realm_id TEXT NOT NULL, - idempotency_key TEXT NOT NULL, - operation_type TEXT NOT NULL, - resource_id TEXT NOT NULL, -- normalized request-derived resource identifier (not a generated entity id) - - -- Finalization/replay - http_status INTEGER, -- NULL while IN_PROGRESS; set only on finalized 2xx/terminal 4xx - error_subtype TEXT, -- optional: e.g., already_exists, namespace_not_empty, idempotency_replay_failed - response_summary TEXT, -- minimal body to reproduce equivalent response (JSON string) - response_headers TEXT, -- small whitelisted headers to replay (JSON string) - finalized_at TIMESTAMP, -- when http_status was written - - -- Liveness/ops - created_at TIMESTAMP NOT NULL, - updated_at TIMESTAMP NOT NULL, - heartbeat_at TIMESTAMP, -- updated by owner while IN_PROGRESS - executor_id TEXT, -- owner pod/worker id - expires_at TIMESTAMP, - - PRIMARY KEY (realm_id, idempotency_key) -); - CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires ON idempotency_records (realm_id, expires_at); @@ -209,9 +175,10 @@ CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires CREATE TABLE IF NOT EXISTS scan_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id BIGINT NOT NULL, + catalog_id TEXT NOT NULL, + catalog_name TEXT NOT NULL, namespace TEXT NOT NULL, - table_id BIGINT NOT NULL, + table_name TEXT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, @@ -279,9 +246,10 @@ COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for sca CREATE TABLE IF NOT EXISTS commit_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id BIGINT NOT NULL, + catalog_id TEXT NOT NULL, + catalog_name TEXT NOT NULL, namespace TEXT NOT NULL, - table_id BIGINT NOT NULL, + table_name TEXT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql index 530fc969d9..b85496b5fc 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql @@ -16,20 +16,14 @@ -- specific language governing permissions and limitations -- under the License. --- ============================================================================ --- POLARIS JDBC SCHEMA VERSION 4 (PostgreSQL) --- ============================================================================ --- This schema is SELF-CONTAINED and can be used for fresh installs. --- Each schema version includes ALL tables, not just incremental changes. --- +-- Changes from v2: +-- * Added `events` table +-- * Added `idempotency_records` table for REST idempotency -- Changes from v3: --- * Added `events` table --- * Added `idempotency_records` table for REST idempotency --- * Added `scan_metrics_report` table for scan metrics as first-class entities --- * Added `scan_metrics_report_roles` junction table for principal roles --- * Added `commit_metrics_report` table for commit metrics as first-class entities --- * Added `commit_metrics_report_roles` junction table for principal roles --- ============================================================================ +-- * Added `scan_metrics_report` table for scan metrics as first-class entities +-- * Added `scan_metrics_report_roles` junction table for principal roles +-- * Added `commit_metrics_report` table for commit metrics as first-class entities +-- * Added `commit_metrics_report_roles` junction table for principal roles CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET search_path TO POLARIS_SCHEMA; @@ -133,10 +127,6 @@ CREATE TABLE IF NOT EXISTS policy_mapping_record ( CREATE INDEX IF NOT EXISTS idx_policy_mapping_record ON policy_mapping_record (realm_id, policy_type_code, policy_catalog_id, policy_id, target_catalog_id, target_id); --- ============================================================================ --- EVENTS TABLE (NEW in v4) --- ============================================================================ - CREATE TABLE IF NOT EXISTS events ( realm_id TEXT NOT NULL, catalog_id TEXT NOT NULL, @@ -151,10 +141,7 @@ CREATE TABLE IF NOT EXISTS events ( PRIMARY KEY (event_id) ); --- ============================================================================ --- IDEMPOTENCY RECORDS TABLE (NEW in v4) --- ============================================================================ - +-- Idempotency records (key-only idempotency; durable replay) CREATE TABLE IF NOT EXISTS idempotency_records ( realm_id TEXT NOT NULL, idempotency_key TEXT NOT NULL, @@ -190,9 +177,10 @@ CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires CREATE TABLE IF NOT EXISTS scan_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id BIGINT NOT NULL, + catalog_id TEXT NOT NULL, + catalog_name TEXT NOT NULL, namespace TEXT NOT NULL, - table_id BIGINT NOT NULL, + table_name TEXT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, @@ -266,9 +254,10 @@ COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for sca CREATE TABLE IF NOT EXISTS commit_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id BIGINT NOT NULL, + catalog_id TEXT NOT NULL, + catalog_name TEXT NOT NULL, namespace TEXT NOT NULL, - table_id BIGINT NOT NULL, + table_name TEXT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java new file mode 100644 index 0000000000..8bb295c4d1 --- /dev/null +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc.models; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Map; +import org.apache.polaris.persistence.relational.jdbc.DatabaseType; +import org.junit.jupiter.api.Test; +import org.postgresql.util.PGobject; + +public class ModelCommitMetricsReportTest { + + private static final String TEST_REPORT_ID = "commit-report-123"; + private static final String TEST_REALM_ID = "realm-1"; + private static final String TEST_CATALOG_ID = "catalog-1"; + private static final String TEST_CATALOG_NAME = "my_catalog"; + private static final String TEST_NAMESPACE = "db.schema"; + private static final String TEST_TABLE_NAME = "my_table"; + private static final long TEST_TIMESTAMP_MS = 1704067200000L; + private static final String TEST_PRINCIPAL = "user@example.com"; + private static final String TEST_REQUEST_ID = "req-456"; + private static final String TEST_OTEL_TRACE_ID = "trace-789"; + private static final String TEST_OTEL_SPAN_ID = "span-012"; + private static final String TEST_REPORT_TRACE_ID = "report-trace-345"; + private static final long TEST_SNAPSHOT_ID = 987654321L; + private static final Long TEST_SEQUENCE_NUMBER = 5L; + private static final String TEST_OPERATION = "append"; + private static final long TEST_ADDED_DATA_FILES = 10L; + private static final long TEST_REMOVED_DATA_FILES = 2L; + private static final long TEST_TOTAL_DATA_FILES = 50L; + private static final long TEST_ADDED_DELETE_FILES = 1L; + private static final long TEST_REMOVED_DELETE_FILES = 0L; + private static final long TEST_TOTAL_DELETE_FILES = 3L; + private static final long TEST_ADDED_EQUALITY_DELETE_FILES = 1L; + private static final long TEST_REMOVED_EQUALITY_DELETE_FILES = 0L; + private static final long TEST_ADDED_POSITIONAL_DELETE_FILES = 0L; + private static final long TEST_REMOVED_POSITIONAL_DELETE_FILES = 0L; + private static final long TEST_ADDED_RECORDS = 1000L; + private static final long TEST_REMOVED_RECORDS = 50L; + private static final long TEST_TOTAL_RECORDS = 10000L; + private static final long TEST_ADDED_FILE_SIZE = 1024000L; + private static final long TEST_REMOVED_FILE_SIZE = 51200L; + private static final long TEST_TOTAL_FILE_SIZE = 10240000L; + private static final long TEST_TOTAL_DURATION = 250L; + private static final int TEST_ATTEMPTS = 1; + private static final String TEST_METADATA = "{\"commit\":\"info\"}"; + + @Test + public void testFromResultSet() throws SQLException { + ResultSet mockResultSet = mock(ResultSet.class); + when(mockResultSet.getString(ModelCommitMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.CATALOG_NAME)) + .thenReturn(TEST_CATALOG_NAME); + when(mockResultSet.getString(ModelCommitMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); + when(mockResultSet.getString(ModelCommitMetricsReport.TABLE_NAME_COL)) + .thenReturn(TEST_TABLE_NAME); + when(mockResultSet.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)) + .thenReturn(TEST_TIMESTAMP_MS); + when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_NAME)) + .thenReturn(TEST_PRINCIPAL); + when(mockResultSet.getString(ModelCommitMetricsReport.REQUEST_ID)).thenReturn(TEST_REQUEST_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.OTEL_TRACE_ID)) + .thenReturn(TEST_OTEL_TRACE_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.OTEL_SPAN_ID)) + .thenReturn(TEST_OTEL_SPAN_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.REPORT_TRACE_ID)) + .thenReturn(TEST_REPORT_TRACE_ID); + when(mockResultSet.getLong(ModelCommitMetricsReport.SNAPSHOT_ID)).thenReturn(TEST_SNAPSHOT_ID); + when(mockResultSet.getObject(ModelCommitMetricsReport.SEQUENCE_NUMBER, Long.class)) + .thenReturn(TEST_SEQUENCE_NUMBER); + when(mockResultSet.getString(ModelCommitMetricsReport.OPERATION)).thenReturn(TEST_OPERATION); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_DATA_FILES)) + .thenReturn(TEST_ADDED_DATA_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_DATA_FILES)) + .thenReturn(TEST_REMOVED_DATA_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DATA_FILES)) + .thenReturn(TEST_TOTAL_DATA_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_DELETE_FILES)) + .thenReturn(TEST_ADDED_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_DELETE_FILES)) + .thenReturn(TEST_REMOVED_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DELETE_FILES)) + .thenReturn(TEST_TOTAL_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_EQUALITY_DELETE_FILES)) + .thenReturn(TEST_ADDED_EQUALITY_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_EQUALITY_DELETE_FILES)) + .thenReturn(TEST_REMOVED_EQUALITY_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_POSITIONAL_DELETE_FILES)) + .thenReturn(TEST_ADDED_POSITIONAL_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_POSITIONAL_DELETE_FILES)) + .thenReturn(TEST_REMOVED_POSITIONAL_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_RECORDS)) + .thenReturn(TEST_ADDED_RECORDS); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_RECORDS)) + .thenReturn(TEST_REMOVED_RECORDS); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_RECORDS)) + .thenReturn(TEST_TOTAL_RECORDS); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_FILE_SIZE_BYTES)) + .thenReturn(TEST_ADDED_FILE_SIZE); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_FILE_SIZE_BYTES)) + .thenReturn(TEST_REMOVED_FILE_SIZE); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_FILE_SIZE_BYTES)) + .thenReturn(TEST_TOTAL_FILE_SIZE); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DURATION_MS)) + .thenReturn(TEST_TOTAL_DURATION); + when(mockResultSet.getInt(ModelCommitMetricsReport.ATTEMPTS)).thenReturn(TEST_ATTEMPTS); + when(mockResultSet.getString(ModelCommitMetricsReport.METADATA)).thenReturn(TEST_METADATA); + + ModelCommitMetricsReport result = + ModelCommitMetricsReport.CONVERTER.fromResultSet(mockResultSet); + + assertEquals(TEST_REPORT_ID, result.getReportId()); + assertEquals(TEST_REALM_ID, result.getRealmId()); + assertEquals(TEST_CATALOG_ID, result.getCatalogId()); + assertEquals(TEST_CATALOG_NAME, result.getCatalogName()); + assertEquals(TEST_NAMESPACE, result.getNamespace()); + assertEquals(TEST_TABLE_NAME, result.getTableName()); + assertEquals(TEST_TIMESTAMP_MS, result.getTimestampMs()); + assertEquals(TEST_SNAPSHOT_ID, result.getSnapshotId()); + assertEquals(TEST_OPERATION, result.getOperation()); + assertEquals(TEST_ADDED_DATA_FILES, result.getAddedDataFiles()); + assertEquals(TEST_ADDED_RECORDS, result.getAddedRecords()); + assertEquals(TEST_TOTAL_DURATION, result.getTotalDurationMs()); + assertEquals(TEST_ATTEMPTS, result.getAttempts()); + assertEquals(TEST_METADATA, result.getMetadata()); + } + + @Test + public void testToMapWithH2DatabaseType() { + ModelCommitMetricsReport report = createTestReport(); + + Map resultMap = report.toMap(DatabaseType.H2); + + assertEquals(TEST_REPORT_ID, resultMap.get(ModelCommitMetricsReport.REPORT_ID)); + assertEquals(TEST_REALM_ID, resultMap.get(ModelCommitMetricsReport.REALM_ID)); + assertEquals(TEST_SNAPSHOT_ID, resultMap.get(ModelCommitMetricsReport.SNAPSHOT_ID)); + assertEquals(TEST_OPERATION, resultMap.get(ModelCommitMetricsReport.OPERATION)); + assertEquals(TEST_ADDED_DATA_FILES, resultMap.get(ModelCommitMetricsReport.ADDED_DATA_FILES)); + assertEquals(TEST_METADATA, resultMap.get(ModelCommitMetricsReport.METADATA)); + } + + @Test + public void testToMapWithPostgresType() { + ModelCommitMetricsReport report = createTestReport(); + + Map resultMap = report.toMap(DatabaseType.POSTGRES); + + assertEquals(TEST_REPORT_ID, resultMap.get(ModelCommitMetricsReport.REPORT_ID)); + PGobject pgObject = (PGobject) resultMap.get(ModelCommitMetricsReport.METADATA); + assertEquals("jsonb", pgObject.getType()); + assertEquals(TEST_METADATA, pgObject.getValue()); + } + + private ModelCommitMetricsReport createTestReport() { + return ImmutableModelCommitMetricsReport.builder() + .reportId(TEST_REPORT_ID) + .realmId(TEST_REALM_ID) + .catalogId(TEST_CATALOG_ID) + .catalogName(TEST_CATALOG_NAME) + .namespace(TEST_NAMESPACE) + .tableName(TEST_TABLE_NAME) + .timestampMs(TEST_TIMESTAMP_MS) + .principalName(TEST_PRINCIPAL) + .requestId(TEST_REQUEST_ID) + .otelTraceId(TEST_OTEL_TRACE_ID) + .snapshotId(TEST_SNAPSHOT_ID) + .sequenceNumber(TEST_SEQUENCE_NUMBER) + .operation(TEST_OPERATION) + .addedDataFiles(TEST_ADDED_DATA_FILES) + .removedDataFiles(TEST_REMOVED_DATA_FILES) + .totalDataFiles(TEST_TOTAL_DATA_FILES) + .addedDeleteFiles(TEST_ADDED_DELETE_FILES) + .removedDeleteFiles(TEST_REMOVED_DELETE_FILES) + .totalDeleteFiles(TEST_TOTAL_DELETE_FILES) + .addedEqualityDeleteFiles(TEST_ADDED_EQUALITY_DELETE_FILES) + .removedEqualityDeleteFiles(TEST_REMOVED_EQUALITY_DELETE_FILES) + .addedPositionalDeleteFiles(TEST_ADDED_POSITIONAL_DELETE_FILES) + .removedPositionalDeleteFiles(TEST_REMOVED_POSITIONAL_DELETE_FILES) + .addedRecords(TEST_ADDED_RECORDS) + .removedRecords(TEST_REMOVED_RECORDS) + .totalRecords(TEST_TOTAL_RECORDS) + .addedFileSizeBytes(TEST_ADDED_FILE_SIZE) + .removedFileSizeBytes(TEST_REMOVED_FILE_SIZE) + .totalFileSizeBytes(TEST_TOTAL_FILE_SIZE) + .totalDurationMs(TEST_TOTAL_DURATION) + .attempts(TEST_ATTEMPTS) + .metadata(TEST_METADATA) + .build(); + } +} diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java new file mode 100644 index 0000000000..0c8f26a2ed --- /dev/null +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc.models; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Map; +import org.apache.polaris.persistence.relational.jdbc.DatabaseType; +import org.junit.jupiter.api.Test; +import org.postgresql.util.PGobject; + +public class ModelScanMetricsReportTest { + + private static final String TEST_REPORT_ID = "report-123"; + private static final String TEST_REALM_ID = "realm-1"; + private static final String TEST_CATALOG_ID = "catalog-1"; + private static final String TEST_CATALOG_NAME = "my_catalog"; + private static final String TEST_NAMESPACE = "db.schema"; + private static final String TEST_TABLE_NAME = "my_table"; + private static final long TEST_TIMESTAMP_MS = 1704067200000L; + private static final String TEST_PRINCIPAL = "user@example.com"; + private static final String TEST_REQUEST_ID = "req-456"; + private static final String TEST_OTEL_TRACE_ID = "trace-789"; + private static final String TEST_OTEL_SPAN_ID = "span-012"; + private static final String TEST_REPORT_TRACE_ID = "report-trace-345"; + private static final Long TEST_SNAPSHOT_ID = 123456789L; + private static final Integer TEST_SCHEMA_ID = 1; + private static final String TEST_FILTER = "id > 100"; + private static final String TEST_PROJECTED_IDS = "1,2,3"; + private static final String TEST_PROJECTED_NAMES = "id,name,value"; + private static final long TEST_RESULT_DATA_FILES = 10L; + private static final long TEST_RESULT_DELETE_FILES = 2L; + private static final long TEST_TOTAL_FILE_SIZE = 1024000L; + private static final long TEST_TOTAL_DATA_MANIFESTS = 5L; + private static final long TEST_TOTAL_DELETE_MANIFESTS = 1L; + private static final long TEST_SCANNED_DATA_MANIFESTS = 3L; + private static final long TEST_SCANNED_DELETE_MANIFESTS = 1L; + private static final long TEST_SKIPPED_DATA_MANIFESTS = 2L; + private static final long TEST_SKIPPED_DELETE_MANIFESTS = 0L; + private static final long TEST_SKIPPED_DATA_FILES = 5L; + private static final long TEST_SKIPPED_DELETE_FILES = 0L; + private static final long TEST_PLANNING_DURATION = 150L; + private static final long TEST_EQUALITY_DELETE_FILES = 1L; + private static final long TEST_POSITIONAL_DELETE_FILES = 1L; + private static final long TEST_INDEXED_DELETE_FILES = 0L; + private static final long TEST_DELETE_FILE_SIZE = 2048L; + private static final String TEST_METADATA = "{\"custom\":\"value\"}"; + + @Test + public void testFromResultSet() throws SQLException { + ResultSet mockResultSet = mock(ResultSet.class); + when(mockResultSet.getString(ModelScanMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); + when(mockResultSet.getString(ModelScanMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); + when(mockResultSet.getString(ModelScanMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); + when(mockResultSet.getString(ModelScanMetricsReport.CATALOG_NAME)) + .thenReturn(TEST_CATALOG_NAME); + when(mockResultSet.getString(ModelScanMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); + when(mockResultSet.getString(ModelScanMetricsReport.TABLE_NAME_COL)) + .thenReturn(TEST_TABLE_NAME); + when(mockResultSet.getLong(ModelScanMetricsReport.TIMESTAMP_MS)).thenReturn(TEST_TIMESTAMP_MS); + when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_NAME)).thenReturn(TEST_PRINCIPAL); + when(mockResultSet.getString(ModelScanMetricsReport.REQUEST_ID)).thenReturn(TEST_REQUEST_ID); + when(mockResultSet.getString(ModelScanMetricsReport.OTEL_TRACE_ID)) + .thenReturn(TEST_OTEL_TRACE_ID); + when(mockResultSet.getString(ModelScanMetricsReport.OTEL_SPAN_ID)) + .thenReturn(TEST_OTEL_SPAN_ID); + when(mockResultSet.getString(ModelScanMetricsReport.REPORT_TRACE_ID)) + .thenReturn(TEST_REPORT_TRACE_ID); + when(mockResultSet.getObject(ModelScanMetricsReport.SNAPSHOT_ID, Long.class)) + .thenReturn(TEST_SNAPSHOT_ID); + when(mockResultSet.getObject(ModelScanMetricsReport.SCHEMA_ID, Integer.class)) + .thenReturn(TEST_SCHEMA_ID); + when(mockResultSet.getString(ModelScanMetricsReport.FILTER_EXPRESSION)).thenReturn(TEST_FILTER); + when(mockResultSet.getString(ModelScanMetricsReport.PROJECTED_FIELD_IDS)) + .thenReturn(TEST_PROJECTED_IDS); + when(mockResultSet.getString(ModelScanMetricsReport.PROJECTED_FIELD_NAMES)) + .thenReturn(TEST_PROJECTED_NAMES); + when(mockResultSet.getLong(ModelScanMetricsReport.RESULT_DATA_FILES)) + .thenReturn(TEST_RESULT_DATA_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.RESULT_DELETE_FILES)) + .thenReturn(TEST_RESULT_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_FILE_SIZE_BYTES)) + .thenReturn(TEST_TOTAL_FILE_SIZE); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DATA_MANIFESTS)) + .thenReturn(TEST_TOTAL_DATA_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_MANIFESTS)) + .thenReturn(TEST_TOTAL_DELETE_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SCANNED_DATA_MANIFESTS)) + .thenReturn(TEST_SCANNED_DATA_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SCANNED_DELETE_MANIFESTS)) + .thenReturn(TEST_SCANNED_DELETE_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DATA_MANIFESTS)) + .thenReturn(TEST_SKIPPED_DATA_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DELETE_MANIFESTS)) + .thenReturn(TEST_SKIPPED_DELETE_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DATA_FILES)) + .thenReturn(TEST_SKIPPED_DATA_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DELETE_FILES)) + .thenReturn(TEST_SKIPPED_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_PLANNING_DURATION_MS)) + .thenReturn(TEST_PLANNING_DURATION); + when(mockResultSet.getLong(ModelScanMetricsReport.EQUALITY_DELETE_FILES)) + .thenReturn(TEST_EQUALITY_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.POSITIONAL_DELETE_FILES)) + .thenReturn(TEST_POSITIONAL_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.INDEXED_DELETE_FILES)) + .thenReturn(TEST_INDEXED_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)) + .thenReturn(TEST_DELETE_FILE_SIZE); + when(mockResultSet.getString(ModelScanMetricsReport.METADATA)).thenReturn(TEST_METADATA); + + ModelScanMetricsReport result = ModelScanMetricsReport.CONVERTER.fromResultSet(mockResultSet); + + assertEquals(TEST_REPORT_ID, result.getReportId()); + assertEquals(TEST_REALM_ID, result.getRealmId()); + assertEquals(TEST_CATALOG_ID, result.getCatalogId()); + assertEquals(TEST_CATALOG_NAME, result.getCatalogName()); + assertEquals(TEST_NAMESPACE, result.getNamespace()); + assertEquals(TEST_TABLE_NAME, result.getTableName()); + assertEquals(TEST_TIMESTAMP_MS, result.getTimestampMs()); + assertEquals(TEST_PRINCIPAL, result.getPrincipalName()); + assertEquals(TEST_REQUEST_ID, result.getRequestId()); + assertEquals(TEST_OTEL_TRACE_ID, result.getOtelTraceId()); + assertEquals(TEST_SNAPSHOT_ID, result.getSnapshotId()); + assertEquals(TEST_RESULT_DATA_FILES, result.getResultDataFiles()); + assertEquals(TEST_TOTAL_FILE_SIZE, result.getTotalFileSizeBytes()); + assertEquals(TEST_PLANNING_DURATION, result.getTotalPlanningDurationMs()); + assertEquals(TEST_METADATA, result.getMetadata()); + } + + @Test + public void testToMapWithH2DatabaseType() { + ModelScanMetricsReport report = createTestReport(); + + Map resultMap = report.toMap(DatabaseType.H2); + + assertEquals(TEST_REPORT_ID, resultMap.get(ModelScanMetricsReport.REPORT_ID)); + assertEquals(TEST_REALM_ID, resultMap.get(ModelScanMetricsReport.REALM_ID)); + assertEquals(TEST_CATALOG_ID, resultMap.get(ModelScanMetricsReport.CATALOG_ID)); + assertEquals(TEST_CATALOG_NAME, resultMap.get(ModelScanMetricsReport.CATALOG_NAME)); + assertEquals(TEST_NAMESPACE, resultMap.get(ModelScanMetricsReport.NAMESPACE)); + assertEquals(TEST_TABLE_NAME, resultMap.get(ModelScanMetricsReport.TABLE_NAME_COL)); + assertEquals(TEST_TIMESTAMP_MS, resultMap.get(ModelScanMetricsReport.TIMESTAMP_MS)); + assertEquals(TEST_RESULT_DATA_FILES, resultMap.get(ModelScanMetricsReport.RESULT_DATA_FILES)); + assertEquals(TEST_METADATA, resultMap.get(ModelScanMetricsReport.METADATA)); + } + + @Test + public void testToMapWithPostgresType() { + ModelScanMetricsReport report = createTestReport(); + + Map resultMap = report.toMap(DatabaseType.POSTGRES); + + assertEquals(TEST_REPORT_ID, resultMap.get(ModelScanMetricsReport.REPORT_ID)); + PGobject pgObject = (PGobject) resultMap.get(ModelScanMetricsReport.METADATA); + assertEquals("jsonb", pgObject.getType()); + assertEquals(TEST_METADATA, pgObject.getValue()); + } + + private ModelScanMetricsReport createTestReport() { + return ImmutableModelScanMetricsReport.builder() + .reportId(TEST_REPORT_ID) + .realmId(TEST_REALM_ID) + .catalogId(TEST_CATALOG_ID) + .catalogName(TEST_CATALOG_NAME) + .namespace(TEST_NAMESPACE) + .tableName(TEST_TABLE_NAME) + .timestampMs(TEST_TIMESTAMP_MS) + .principalName(TEST_PRINCIPAL) + .requestId(TEST_REQUEST_ID) + .otelTraceId(TEST_OTEL_TRACE_ID) + .snapshotId(TEST_SNAPSHOT_ID) + .resultDataFiles(TEST_RESULT_DATA_FILES) + .resultDeleteFiles(TEST_RESULT_DELETE_FILES) + .totalFileSizeBytes(TEST_TOTAL_FILE_SIZE) + .totalDataManifests(TEST_TOTAL_DATA_MANIFESTS) + .totalDeleteManifests(TEST_TOTAL_DELETE_MANIFESTS) + .scannedDataManifests(TEST_SCANNED_DATA_MANIFESTS) + .scannedDeleteManifests(TEST_SCANNED_DELETE_MANIFESTS) + .skippedDataManifests(TEST_SKIPPED_DATA_MANIFESTS) + .skippedDeleteManifests(TEST_SKIPPED_DELETE_MANIFESTS) + .skippedDataFiles(TEST_SKIPPED_DATA_FILES) + .skippedDeleteFiles(TEST_SKIPPED_DELETE_FILES) + .totalPlanningDurationMs(TEST_PLANNING_DURATION) + .equalityDeleteFiles(TEST_EQUALITY_DELETE_FILES) + .positionalDeleteFiles(TEST_POSITIONAL_DELETE_FILES) + .indexedDeleteFiles(TEST_INDEXED_DELETE_FILES) + .totalDeleteFileSizeBytes(TEST_DELETE_FILE_SIZE) + .metadata(TEST_METADATA) + .build(); + } +} From 58563e0b1a95b07f86812bd9aaaf535f88e4b355 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 16 Jan 2026 19:46:00 -0800 Subject: [PATCH 17/67] feat(persistence): Add JDBC persistence implementation for metrics (PR#4) This commit adds the JDBC layer for persisting metrics reports to the database: Implementation: - JdbcBasePersistenceImpl: Add insertScanMetricsReport() and insertCommitMetricsReport() methods - QueryGenerator: Add SQL generation for metrics tables (INSERT, SELECT, DELETE) - build.gradle.kts: Add any required dependencies Tests: - MetricsReportPersistenceTest: Integration tests for database CRUD operations - Test insert/query/delete for scan metrics reports - Test insert/query/delete for commit metrics reports - Test querying by various filters (realm, catalog, table, time range) This builds on the schema and model classes from PR#3 and provides the actual database operations used by the metrics reporters. --- persistence/relational-jdbc/build.gradle.kts | 6 + .../jdbc/JdbcBasePersistenceImpl.java | 262 +++++++++ .../relational/jdbc/QueryGenerator.java | 26 +- .../jdbc/MetricsReportPersistenceTest.java | 508 ++++++++++++++++++ 4 files changed, 801 insertions(+), 1 deletion(-) create mode 100644 persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java diff --git a/persistence/relational-jdbc/build.gradle.kts b/persistence/relational-jdbc/build.gradle.kts index c3e4253727..3de2526dd3 100644 --- a/persistence/relational-jdbc/build.gradle.kts +++ b/persistence/relational-jdbc/build.gradle.kts @@ -29,10 +29,16 @@ dependencies { compileOnly(platform(libs.jackson.bom)) compileOnly("com.fasterxml.jackson.core:jackson-annotations") + compileOnly("com.fasterxml.jackson.core:jackson-databind") compileOnly(libs.jakarta.annotation.api) compileOnly(libs.jakarta.enterprise.cdi.api) compileOnly(libs.jakarta.inject.api) + // Iceberg API for metrics report conversion + compileOnly(platform(libs.iceberg.bom)) + compileOnly("org.apache.iceberg:iceberg-api") + compileOnly("org.apache.iceberg:iceberg-core") + implementation(libs.smallrye.common.annotation) // @Identifier implementation(libs.postgresql) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index 9401df2dd0..35e7d9c30e 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -69,11 +69,15 @@ import org.apache.polaris.core.storage.PolarisStorageIntegrationProvider; import org.apache.polaris.core.storage.StorageLocation; import org.apache.polaris.persistence.relational.jdbc.models.EntityNameLookupRecordConverter; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReportConverter; import org.apache.polaris.persistence.relational.jdbc.models.ModelEntity; import org.apache.polaris.persistence.relational.jdbc.models.ModelEvent; import org.apache.polaris.persistence.relational.jdbc.models.ModelGrantRecord; import org.apache.polaris.persistence.relational.jdbc.models.ModelPolicyMappingRecord; import org.apache.polaris.persistence.relational.jdbc.models.ModelPrincipalAuthenticationData; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReportConverter; import org.apache.polaris.persistence.relational.jdbc.models.SchemaVersion; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -315,6 +319,264 @@ public void writeEvents(@Nonnull List events) { } } + /** + * Writes a scan metrics report to the database as a first-class entity. + * + * @param report the scan metrics report to persist + */ + public void writeScanMetricsReport(@Nonnull ModelScanMetricsReport report) { + try { + PreparedQuery pq = + QueryGenerator.generateInsertQueryWithoutRealmId( + ModelScanMetricsReport.ALL_COLUMNS, + ModelScanMetricsReport.TABLE_NAME, + report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); + int updated = datasourceOperations.executeUpdate(pq); + if (updated == 0) { + throw new SQLException("Scan metrics report was not inserted."); + } + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to write scan metrics report due to %s", e.getMessage()), e); + } + } + + /** + * Writes a commit metrics report to the database as a first-class entity. + * + * @param report the commit metrics report to persist + */ + public void writeCommitMetricsReport(@Nonnull ModelCommitMetricsReport report) { + try { + PreparedQuery pq = + QueryGenerator.generateInsertQueryWithoutRealmId( + ModelCommitMetricsReport.ALL_COLUMNS, + ModelCommitMetricsReport.TABLE_NAME, + report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); + int updated = datasourceOperations.executeUpdate(pq); + if (updated == 0) { + throw new SQLException("Commit metrics report was not inserted."); + } + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to write commit metrics report due to %s", e.getMessage()), e); + } + } + + /** + * Retrieves scan metrics reports for a specific table within a time range. + * + * @param catalogName the catalog name + * @param namespace the namespace + * @param tableName the table name + * @param startTimeMs start of time range (inclusive), or null for no lower bound + * @param endTimeMs end of time range (exclusive), or null for no upper bound + * @param limit maximum number of results to return + * @return list of scan metrics reports matching the criteria + */ + @Nonnull + public List queryScanMetricsReports( + @Nonnull String catalogName, + @Nonnull String namespace, + @Nonnull String tableName, + @Nullable Long startTimeMs, + @Nullable Long endTimeMs, + int limit) { + try { + StringBuilder whereClause = new StringBuilder(); + whereClause.append("realm_id = ? AND catalog_name = ? AND namespace = ? AND table_name = ?"); + List values = new ArrayList<>(List.of(realmId, catalogName, namespace, tableName)); + + if (startTimeMs != null) { + whereClause.append(" AND timestamp_ms >= ?"); + values.add(startTimeMs); + } + if (endTimeMs != null) { + whereClause.append(" AND timestamp_ms < ?"); + values.add(endTimeMs); + } + + String sql = + "SELECT * FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) + + " WHERE " + + whereClause + + " ORDER BY timestamp_ms DESC LIMIT " + + limit; + + PreparedQuery query = new PreparedQuery(sql, values); + var results = + datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); + return results == null ? Collections.emptyList() : results; + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to query scan metrics reports due to %s", e.getMessage()), e); + } + } + + /** + * Retrieves commit metrics reports for a specific table within a time range. + * + * @param catalogName the catalog name + * @param namespace the namespace + * @param tableName the table name + * @param startTimeMs start of time range (inclusive), or null for no lower bound + * @param endTimeMs end of time range (exclusive), or null for no upper bound + * @param limit maximum number of results to return + * @return list of commit metrics reports matching the criteria + */ + @Nonnull + public List queryCommitMetricsReports( + @Nonnull String catalogName, + @Nonnull String namespace, + @Nonnull String tableName, + @Nullable Long startTimeMs, + @Nullable Long endTimeMs, + int limit) { + try { + List values = new ArrayList<>(List.of(realmId, catalogName, namespace, tableName)); + + StringBuilder whereClause = new StringBuilder(); + whereClause.append("realm_id = ? AND catalog_name = ? AND namespace = ? AND table_name = ?"); + + if (startTimeMs != null) { + whereClause.append(" AND timestamp_ms >= ?"); + values.add(startTimeMs); + } + if (endTimeMs != null) { + whereClause.append(" AND timestamp_ms < ?"); + values.add(endTimeMs); + } + + String sql = + "SELECT * FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) + + " WHERE " + + whereClause + + " ORDER BY timestamp_ms DESC LIMIT " + + limit; + + PreparedQuery query = new PreparedQuery(sql, values); + var results = + datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); + return results == null ? Collections.emptyList() : results; + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to query commit metrics reports due to %s", e.getMessage()), e); + } + } + + /** + * Retrieves scan metrics reports by OpenTelemetry trace ID. + * + * @param traceId the OpenTelemetry trace ID + * @return list of scan metrics reports with the given trace ID + */ + @Nonnull + public List queryScanMetricsReportsByTraceId(@Nonnull String traceId) { + try { + String sql = + "SELECT * FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) + + " WHERE realm_id = ? AND otel_trace_id = ? ORDER BY timestamp_ms DESC"; + + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); + var results = + datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); + return results == null ? Collections.emptyList() : results; + } catch (SQLException e) { + throw new RuntimeException( + String.format( + "Failed to query scan metrics reports by trace ID due to %s", e.getMessage()), + e); + } + } + + /** + * Retrieves commit metrics reports by OpenTelemetry trace ID. + * + * @param traceId the OpenTelemetry trace ID + * @return list of commit metrics reports with the given trace ID + */ + @Nonnull + public List queryCommitMetricsReportsByTraceId( + @Nonnull String traceId) { + try { + String sql = + "SELECT * FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) + + " WHERE realm_id = ? AND otel_trace_id = ? ORDER BY timestamp_ms DESC"; + + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); + var results = + datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); + return results == null ? Collections.emptyList() : results; + } catch (SQLException e) { + throw new RuntimeException( + String.format( + "Failed to query commit metrics reports by trace ID due to %s", e.getMessage()), + e); + } + } + + /** + * Deletes scan metrics reports older than the specified timestamp. + * + * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be + * deleted + * @return the number of reports deleted + */ + public int deleteScanMetricsReportsOlderThan(long olderThanMs) { + try { + String sql = + "DELETE FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) + + " WHERE realm_id = ? AND timestamp_ms < ?"; + + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, olderThanMs)); + return datasourceOperations.executeUpdate(query); + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to delete old scan metrics reports due to %s", e.getMessage()), e); + } + } + + /** + * Deletes commit metrics reports older than the specified timestamp. + * + * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be + * deleted + * @return the number of reports deleted + */ + public int deleteCommitMetricsReportsOlderThan(long olderThanMs) { + try { + String sql = + "DELETE FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) + + " WHERE realm_id = ? AND timestamp_ms < ?"; + + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, olderThanMs)); + return datasourceOperations.executeUpdate(query); + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to delete old commit metrics reports due to %s", e.getMessage()), + e); + } + } + + /** + * Deletes all metrics reports (both scan and commit) older than the specified timestamp. + * + * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be + * deleted + * @return the total number of reports deleted (scan + commit) + */ + public int deleteAllMetricsReportsOlderThan(long olderThanMs) { + int scanDeleted = deleteScanMetricsReportsOlderThan(olderThanMs); + int commitDeleted = deleteCommitMetricsReportsOlderThan(olderThanMs); + return scanDeleted + commitDeleted; + } + @Override public void deleteEntity(@Nonnull PolarisCallContext callCtx, @Nonnull PolarisBaseEntity entity) { ModelEntity modelEntity = ModelEntity.fromEntity(entity, schemaVersion); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/QueryGenerator.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/QueryGenerator.java index 485956ed85..423e965bd9 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/QueryGenerator.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/QueryGenerator.java @@ -169,6 +169,30 @@ public static PreparedQuery generateInsertQuery( return new PreparedQuery(sql, finalValues); } + /** + * Generates an INSERT query for a given table without appending realm_id. Use this when the + * columns already include realm_id. + * + * @param allColumns Columns to insert values into (should already include realm_id if needed). + * @param tableName Target table name. + * @param values Values for each column (must match order of columns). + * @return INSERT query with value bindings. + */ + public static PreparedQuery generateInsertQueryWithoutRealmId( + @Nonnull List allColumns, @Nonnull String tableName, List values) { + String columns = String.join(", ", allColumns); + String placeholders = allColumns.stream().map(c -> "?").collect(Collectors.joining(", ")); + String sql = + "INSERT INTO " + + getFullyQualifiedTableName(tableName) + + " (" + + columns + + ") VALUES (" + + placeholders + + ")"; + return new PreparedQuery(sql, values); + } + /** * Builds an UPDATE query. * @@ -317,7 +341,7 @@ public static PreparedQuery generateOverlapQuery( return new PreparedQuery(query.sql(), where.parameters()); } - private static String getFullyQualifiedTableName(String tableName) { + static String getFullyQualifiedTableName(String tableName) { // TODO: make schema name configurable. return "POLARIS_SCHEMA." + tableName; } diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java new file mode 100644 index 0000000000..252a19b920 --- /dev/null +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -0,0 +1,508 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc; + +import static org.apache.polaris.core.persistence.PrincipalSecretsGenerator.RANDOM_SECRETS; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.InputStream; +import java.sql.SQLException; +import java.util.Optional; +import java.util.UUID; +import javax.sql.DataSource; +import org.apache.polaris.core.PolarisDefaultDiagServiceImpl; +import org.apache.polaris.core.PolarisDiagnostics; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelScanMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; +import org.h2.jdbcx.JdbcConnectionPool; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +/** + * Integration tests for metrics report persistence using JdbcBasePersistenceImpl. Tests the full + * flow of writing scan and commit metrics reports to the database. + */ +class MetricsReportPersistenceTest { + + private JdbcBasePersistenceImpl persistence; + private DatasourceOperations datasourceOperations; + + @BeforeEach + void setUp() throws SQLException { + DataSource dataSource = + JdbcConnectionPool.create( + "jdbc:h2:mem:test_metrics_" + UUID.randomUUID() + ";DB_CLOSE_DELAY=-1", "sa", ""); + + datasourceOperations = new DatasourceOperations(dataSource, new TestJdbcConfiguration()); + + // Execute schema v4 which includes metrics tables + ClassLoader classLoader = DatasourceOperations.class.getClassLoader(); + InputStream scriptStream = classLoader.getResourceAsStream("h2/schema-v4.sql"); + datasourceOperations.executeScript(scriptStream); + + PolarisDiagnostics diagServices = new PolarisDefaultDiagServiceImpl(); + RealmContext realmContext = () -> "TEST_REALM"; + + persistence = + new JdbcBasePersistenceImpl( + diagServices, + datasourceOperations, + RANDOM_SECRETS, + Mockito.mock(), + realmContext.getRealmIdentifier(), + 4); + } + + @Test + void testWriteScanMetricsReport() { + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db.schema") + .tableName("test_table") + .timestampMs(System.currentTimeMillis()) + .snapshotId(12345L) + .schemaId(1) + .filterExpression("id > 100") + .resultDataFiles(10L) + .resultDeleteFiles(2L) + .totalFileSizeBytes(1024000L) + .totalDataManifests(5L) + .totalDeleteManifests(1L) + .scannedDataManifests(3L) + .scannedDeleteManifests(1L) + .skippedDataManifests(2L) + .skippedDeleteManifests(0L) + .skippedDataFiles(5L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(150L) + .equalityDeleteFiles(1L) + .positionalDeleteFiles(1L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(10240L) + .principalName("test-user") + .requestId("req-123") + .otelTraceId("trace-abc") + .otelSpanId("span-xyz") + .reportTraceId("report-trace-1") + .build(); + + // Should not throw + persistence.writeScanMetricsReport(report); + } + + @Test + void testWriteCommitMetricsReport() { + ModelCommitMetricsReport report = + ImmutableModelCommitMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db.schema") + .tableName("test_table") + .timestampMs(System.currentTimeMillis()) + .snapshotId(12345L) + .sequenceNumber(1L) + .operation("append") + .addedDataFiles(5L) + .removedDataFiles(0L) + .totalDataFiles(100L) + .addedDeleteFiles(0L) + .removedDeleteFiles(0L) + .totalDeleteFiles(2L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(0L) + .removedPositionalDeleteFiles(0L) + .addedRecords(1000L) + .removedRecords(0L) + .totalRecords(50000L) + .addedFileSizeBytes(102400L) + .removedFileSizeBytes(0L) + .totalFileSizeBytes(5120000L) + .totalDurationMs(250L) + .attempts(1) + .principalName("test-user") + .requestId("req-456") + .otelTraceId("trace-def") + .otelSpanId("span-uvw") + .reportTraceId("report-trace-2") + .build(); + + // Should not throw + persistence.writeCommitMetricsReport(report); + } + + @Test + void testWriteMultipleScanReports() { + for (int i = 0; i < 10; i++) { + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db.schema") + .tableName("table_" + i) + .timestampMs(System.currentTimeMillis()) + .resultDataFiles((long) (i * 10)) + .resultDeleteFiles(0L) + .totalFileSizeBytes((long) (i * 1024)) + .totalDataManifests(1L) + .totalDeleteManifests(0L) + .scannedDataManifests(1L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs((long) (i * 10)) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + + persistence.writeScanMetricsReport(report); + } + } + + @Test + void testWriteReportWithNullOptionalFields() { + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db") + .tableName("minimal_table") + .timestampMs(System.currentTimeMillis()) + // All optional fields left as null + .resultDataFiles(1L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(100L) + .totalDataManifests(1L) + .totalDeleteManifests(0L) + .scannedDataManifests(1L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(10L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + + // Should not throw even with null optional fields + persistence.writeScanMetricsReport(report); + } + + @Test + void testQueryScanMetricsReportsByTable() { + long baseTime = System.currentTimeMillis(); + + // Write multiple reports for the same table + for (int i = 0; i < 5; i++) { + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db.schema") + .tableName("query_test_table") + .timestampMs(baseTime + i * 1000) + .resultDataFiles((long) i) + .resultDeleteFiles(0L) + .totalFileSizeBytes(100L) + .totalDataManifests(1L) + .totalDeleteManifests(0L) + .scannedDataManifests(1L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(10L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + persistence.writeScanMetricsReport(report); + } + + // Query all reports for the table + var results = + persistence.queryScanMetricsReports( + "test-catalog", "db.schema", "query_test_table", null, null, 10); + assertThat(results).hasSize(5); + + // Query with time range + var rangeResults = + persistence.queryScanMetricsReports( + "test-catalog", "db.schema", "query_test_table", baseTime + 1000, baseTime + 4000, 10); + assertThat(rangeResults).hasSize(3); + + // Query with limit + var limitedResults = + persistence.queryScanMetricsReports( + "test-catalog", "db.schema", "query_test_table", null, null, 2); + assertThat(limitedResults).hasSize(2); + } + + @Test + void testQueryScanMetricsReportsByTraceId() { + String traceId = "test-trace-" + UUID.randomUUID(); + + // Write a report with trace ID + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db") + .tableName("trace_test_table") + .timestampMs(System.currentTimeMillis()) + .otelTraceId(traceId) + .resultDataFiles(1L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(100L) + .totalDataManifests(1L) + .totalDeleteManifests(0L) + .scannedDataManifests(1L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(10L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + persistence.writeScanMetricsReport(report); + + // Query by trace ID + var results = persistence.queryScanMetricsReportsByTraceId(traceId); + assertThat(results).hasSize(1); + assertThat(results.get(0).getOtelTraceId()).isEqualTo(traceId); + } + + @Test + void testDeleteOldScanMetricsReports() { + // Create reports with different timestamps + long now = System.currentTimeMillis(); + long oneHourAgo = now - 3600_000; + long twoDaysAgo = now - 2 * 24 * 3600_000; + + // Create an old report (2 days ago) + ModelScanMetricsReport oldReport = + ImmutableModelScanMetricsReport.builder() + .reportId("old-report-" + UUID.randomUUID()) + .realmId("TEST_REALM") + .catalogId("catalog1") + .catalogName("test_catalog") + .namespace("test_namespace") + .tableName("test_table") + .timestampMs(twoDaysAgo) + .resultDataFiles(10L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(1000L) + .totalDataManifests(1L) + .totalDeleteManifests(0L) + .scannedDataManifests(1L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(10L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + persistence.writeScanMetricsReport(oldReport); + + // Create a recent report (1 hour ago) + ModelScanMetricsReport recentReport = + ImmutableModelScanMetricsReport.builder() + .reportId("recent-report-" + UUID.randomUUID()) + .realmId("TEST_REALM") + .catalogId("catalog1") + .catalogName("test_catalog") + .namespace("test_namespace") + .tableName("test_table") + .timestampMs(oneHourAgo) + .resultDataFiles(10L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(1000L) + .totalDataManifests(1L) + .totalDeleteManifests(0L) + .scannedDataManifests(1L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(10L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + persistence.writeScanMetricsReport(recentReport); + + // Delete reports older than 1 day + long oneDayAgo = now - 24 * 3600_000; + int deleted = persistence.deleteScanMetricsReportsOlderThan(oneDayAgo); + + // Should have deleted the old report + assertThat(deleted).isEqualTo(1); + + // Query to verify only recent report remains + var results = + persistence.queryScanMetricsReports( + "test_catalog", "test_namespace", "test_table", null, null, 10); + assertThat(results).hasSize(1); + assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); + } + + @Test + void testDeleteOldCommitMetricsReports() { + // Create reports with different timestamps + long now = System.currentTimeMillis(); + long oneHourAgo = now - 3600_000; + long twoDaysAgo = now - 2 * 24 * 3600_000; + + // Create an old report (2 days ago) + ModelCommitMetricsReport oldReport = + ImmutableModelCommitMetricsReport.builder() + .reportId("old-commit-" + UUID.randomUUID()) + .realmId("TEST_REALM") + .catalogId("catalog1") + .catalogName("test_catalog") + .namespace("test_namespace") + .tableName("test_table") + .timestampMs(twoDaysAgo) + .snapshotId(100L) + .sequenceNumber(1L) + .operation("append") + .addedDataFiles(5L) + .removedDataFiles(0L) + .totalDataFiles(5L) + .addedDeleteFiles(0L) + .removedDeleteFiles(0L) + .totalDeleteFiles(0L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(0L) + .removedPositionalDeleteFiles(0L) + .addedRecords(1000L) + .removedRecords(0L) + .totalRecords(1000L) + .addedFileSizeBytes(10000L) + .removedFileSizeBytes(0L) + .totalFileSizeBytes(10000L) + .totalDurationMs(50L) + .attempts(1) + .build(); + persistence.writeCommitMetricsReport(oldReport); + + // Create a recent report (1 hour ago) + ModelCommitMetricsReport recentReport = + ImmutableModelCommitMetricsReport.builder() + .reportId("recent-commit-" + UUID.randomUUID()) + .realmId("TEST_REALM") + .catalogId("catalog1") + .catalogName("test_catalog") + .namespace("test_namespace") + .tableName("test_table") + .timestampMs(oneHourAgo) + .snapshotId(101L) + .sequenceNumber(2L) + .operation("append") + .addedDataFiles(3L) + .removedDataFiles(0L) + .totalDataFiles(8L) + .addedDeleteFiles(0L) + .removedDeleteFiles(0L) + .totalDeleteFiles(0L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(0L) + .removedPositionalDeleteFiles(0L) + .addedRecords(500L) + .removedRecords(0L) + .totalRecords(1500L) + .addedFileSizeBytes(5000L) + .removedFileSizeBytes(0L) + .totalFileSizeBytes(15000L) + .totalDurationMs(30L) + .attempts(1) + .build(); + persistence.writeCommitMetricsReport(recentReport); + + // Delete reports older than 1 day + long oneDayAgo = now - 24 * 3600_000; + int deleted = persistence.deleteCommitMetricsReportsOlderThan(oneDayAgo); + + // Should have deleted the old report + assertThat(deleted).isEqualTo(1); + + // Query to verify only recent report remains + var results = + persistence.queryCommitMetricsReports( + "test_catalog", "test_namespace", "test_table", null, null, 10); + assertThat(results).hasSize(1); + assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); + } + + private static class TestJdbcConfiguration implements RelationalJdbcConfiguration { + @Override + public Optional maxRetries() { + return Optional.of(1); + } + + @Override + public Optional maxDurationInMs() { + return Optional.of(100L); + } + + @Override + public Optional initialDelayInMs() { + return Optional.of(10L); + } + } +} From 86fe3d7b023021e64917d1cdcb6a53aa8b2cc03a Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 16 Jan 2026 19:47:30 -0800 Subject: [PATCH 18/67] feat(reporting): Add metrics reporters infrastructure (PR#5) This commit adds a pluggable metrics reporting infrastructure: Reporter Interface & Implementations: - PolarisMetricsReporter: Base interface for all metrics reporters - PersistingMetricsReporter: Persists ScanReport/CommitReport to database - Extracts principal, trace context, and report data - Converts to ModelScanMetricsReport/ModelCommitMetricsReport - Uses JdbcBasePersistenceImpl for database operations - EventsMetricsReporter: Emits metrics as Polaris events - Useful for event-driven architectures and streaming - CompositeMetricsReporter: Combines multiple reporters - Allows logging + persistence + events simultaneously Configuration: - MetricsReportingConfiguration: SmallRye config for reporter selection - polaris.iceberg-metrics.reporting.type = logging|persistence|events|composite Integration: - ServiceProducers: CDI producer for configured reporter - IcebergCatalogAdapter: Integration with catalog metrics reporting Tests: - PersistingMetricsReporterTest: Unit tests for persistence reporter - EventsMetricsReporterTest: Unit tests for events reporter - CompositeMetricsReporterTest: Unit tests for composite reporter This builds on PR#4's JDBC implementation and provides the service-layer integration for metrics reporting. --- runtime/service/build.gradle.kts | 3 +- .../iceberg/IcebergCatalogAdapter.java | 3 + .../service/config/ServiceProducers.java | 60 ++++- .../reporting/CompositeMetricsReporter.java | 92 ++++++++ .../reporting/EventsMetricsReporter.java | 206 ++++++++++++++++++ .../MetricsReportingConfiguration.java | 68 ++++++ .../reporting/PersistingMetricsReporter.java | 188 ++++++++++++++++ .../CompositeMetricsReporterTest.java | 133 +++++++++++ .../reporting/EventsMetricsReporterTest.java | 156 +++++++++++++ .../PersistingMetricsReporterTest.java | 164 ++++++++++++++ 10 files changed, 1069 insertions(+), 4 deletions(-) create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/CompositeMetricsReporter.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/EventsMetricsReporter.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java create mode 100644 runtime/service/src/test/java/org/apache/polaris/service/reporting/CompositeMetricsReporterTest.java create mode 100644 runtime/service/src/test/java/org/apache/polaris/service/reporting/EventsMetricsReporterTest.java create mode 100644 runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java diff --git a/runtime/service/build.gradle.kts b/runtime/service/build.gradle.kts index 11d1b7a481..134ea0e2db 100644 --- a/runtime/service/build.gradle.kts +++ b/runtime/service/build.gradle.kts @@ -31,7 +31,7 @@ dependencies { implementation(project(":polaris-api-iceberg-service")) implementation(project(":polaris-api-catalog-service")) - runtimeOnly(project(":polaris-relational-jdbc")) + implementation(project(":polaris-relational-jdbc")) implementation(project(":polaris-runtime-defaults")) implementation(project(":polaris-runtime-common")) @@ -62,6 +62,7 @@ dependencies { implementation("io.quarkus:quarkus-micrometer-registry-prometheus") implementation("io.quarkus:quarkus-oidc") implementation("io.quarkus:quarkus-opentelemetry") + implementation("io.quarkus:quarkus-scheduler") implementation("io.quarkus:quarkus-security") implementation("io.quarkus:quarkus-smallrye-context-propagation") implementation("io.quarkus:quarkus-smallrye-fault-tolerance") diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java index 59629a8267..a5bfe58f27 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java @@ -727,6 +727,9 @@ public Response reportMetrics( ReportMetricsRequest reportMetricsRequest, RealmContext realmContext, SecurityContext securityContext) { + // Validate that the caller is authenticated (consistent with other endpoints) + validatePrincipal(securityContext); + String catalogName = prefixParser.prefixToCatalogName(realmContext, prefix); Namespace ns = decodeNamespace(namespace); TableIdentifier tableIdentifier = TableIdentifier.of(ns, RESTUtil.decodeString(table)); diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 121eb382c1..3ecfdc8074 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -31,6 +31,8 @@ import jakarta.enterprise.inject.Produces; import jakarta.inject.Singleton; import java.time.Clock; +import java.util.ArrayList; +import java.util.List; import java.util.stream.Collectors; import org.apache.polaris.core.PolarisCallContext; import org.apache.polaris.core.PolarisDefaultDiagServiceImpl; @@ -79,6 +81,7 @@ import org.apache.polaris.service.ratelimiter.RateLimiterFilterConfiguration; import org.apache.polaris.service.ratelimiter.TokenBucketConfiguration; import org.apache.polaris.service.ratelimiter.TokenBucketFactory; +import org.apache.polaris.service.reporting.CompositeMetricsReporter; import org.apache.polaris.service.reporting.MetricsReportingConfiguration; import org.apache.polaris.service.reporting.PolarisMetricsReporter; import org.apache.polaris.service.secrets.SecretsManagerConfiguration; @@ -273,7 +276,7 @@ public StsClientsPool stsClientsPool( */ public void maybeBootstrap( @Observes Startup event, - Bootstrapper bootstrapper, + MetaStoreManagerFactory factory, PersistenceConfiguration config, RealmContextConfiguration realmContextConfiguration) { var rootCredentialsSet = RootCredentialsSet.fromEnvironment(); @@ -287,7 +290,7 @@ public void maybeBootstrap( RootCredentialsSet.ENVIRONMENT_VARIABLE, RootCredentialsSet.SYSTEM_PROPERTY); - var result = bootstrapper.bootstrapRealms(realmIds, rootCredentialsSet); + var result = factory.bootstrapRealms(realmIds, rootCredentialsSet); result.forEach( (realm, secrets) -> { @@ -435,6 +438,57 @@ public void closeTaskExecutor(@Disposes @Identifier("task-executor") ManagedExec @ApplicationScoped public PolarisMetricsReporter metricsReporter( MetricsReportingConfiguration config, @Any Instance reporters) { - return reporters.select(Identifier.Literal.of(config.type())).get(); + String type = config.type(); + + if ("composite".equals(type)) { + List targets = config.targets(); + if (targets == null || targets.isEmpty()) { + LOGGER.warn( + "Composite metrics reporter configured but no targets specified. " + + "Falling back to default reporter."); + return reporters.select(Identifier.Literal.of("default")).get(); + } + + List delegates = new ArrayList<>(); + for (String target : targets) { + if (target == null || target.isBlank()) { + continue; + } + String trimmedTarget = target.trim(); + // Avoid infinite recursion - don't allow composite as a target + if ("composite".equals(trimmedTarget)) { + LOGGER.warn("Ignoring 'composite' as a target - would cause infinite recursion"); + continue; + } + try { + PolarisMetricsReporter delegate = + reporters.select(Identifier.Literal.of(trimmedTarget)).get(); + delegates.add(delegate); + LOGGER.info("Added metrics reporter target: {}", trimmedTarget); + } catch (Exception e) { + LOGGER.error( + "Failed to instantiate metrics reporter for target '{}': {}", + trimmedTarget, + e.getMessage()); + } + } + + if (delegates.isEmpty()) { + LOGGER.warn("No valid targets for composite reporter. Falling back to default reporter."); + return reporters.select(Identifier.Literal.of("default")).get(); + } + + return new CompositeMetricsReporter(delegates); + } + + try { + return reporters.select(Identifier.Literal.of(type)).get(); + } catch (Exception e) { + LOGGER.error( + "Failed to instantiate metrics reporter for type '{}': {}. Falling back to default.", + type, + e.getMessage()); + return reporters.select(Identifier.Literal.of("default")).get(); + } } } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/CompositeMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/CompositeMetricsReporter.java new file mode 100644 index 0000000000..fbad83128e --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/CompositeMetricsReporter.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import java.util.List; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.MetricsReport; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A composite metrics reporter that delegates to multiple child reporters. This allows metrics to + * be sent to multiple destinations simultaneously, such as both the events table and dedicated + * metrics tables. + * + *

    To enable this reporter, set the configuration: + * + *

    + * polaris:
    + *   iceberg-metrics:
    + *     reporting:
    + *       type: composite
    + *       targets:
    + *         - events       # Write to events table
    + *         - persistence  # Write to dedicated tables
    + * 
    + * + *

    The composite reporter will call each configured target reporter in order. If one reporter + * fails, the others will still be called. + */ +public class CompositeMetricsReporter implements PolarisMetricsReporter { + + private static final Logger LOGGER = LoggerFactory.getLogger(CompositeMetricsReporter.class); + + private final List delegates; + + /** + * Creates a composite reporter with the given delegate reporters. + * + * @param delegates the list of reporters to delegate to + */ + public CompositeMetricsReporter(List delegates) { + this.delegates = List.copyOf(delegates); + LOGGER.info( + "CompositeMetricsReporter initialized with {} delegate(s): {}", + delegates.size(), + delegates.stream().map(r -> r.getClass().getSimpleName()).toList()); + } + + @Override + public void reportMetric(String catalogName, TableIdentifier table, MetricsReport metricsReport) { + for (PolarisMetricsReporter delegate : delegates) { + try { + delegate.reportMetric(catalogName, table, metricsReport); + } catch (Exception e) { + LOGGER.error( + "Delegate reporter {} failed for table {}.{}: {}", + delegate.getClass().getSimpleName(), + catalogName, + table, + e.getMessage(), + e); + // Continue with other delegates even if one fails + } + } + } + + /** + * Returns the list of delegate reporters. + * + * @return unmodifiable list of delegates + */ + public List getDelegates() { + return delegates; + } +} diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/EventsMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/EventsMetricsReporter.java new file mode 100644 index 0000000000..686f2dde82 --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/EventsMetricsReporter.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanContext; +import io.quarkus.security.identity.SecurityIdentity; +import io.smallrye.common.annotation.Identifier; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.enterprise.inject.Instance; +import jakarta.inject.Inject; +import java.security.Principal; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.MetricsReport; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.entity.PolarisEvent; +import org.apache.polaris.core.persistence.BasePersistence; +import org.apache.polaris.core.persistence.MetaStoreManagerFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A metrics reporter that persists scan and commit reports to the events table as JSON. This + * provides a unified audit trail where metrics are stored alongside other catalog events. + * + *

    To enable this reporter, set the configuration: + * + *

    + * polaris:
    + *   iceberg-metrics:
    + *     reporting:
    + *       type: events
    + * 
    + * + *

    Or use it as part of a composite reporter: + * + *

    + * polaris:
    + *   iceberg-metrics:
    + *     reporting:
    + *       type: composite
    + *       targets:
    + *         - events
    + *         - persistence
    + * 
    + */ +@ApplicationScoped +@Identifier("events") +public class EventsMetricsReporter implements PolarisMetricsReporter { + + private static final Logger LOGGER = LoggerFactory.getLogger(EventsMetricsReporter.class); + + public static final String EVENT_TYPE_SCAN_REPORT = "ScanReport"; + public static final String EVENT_TYPE_COMMIT_REPORT = "CommitReport"; + + private final MetaStoreManagerFactory metaStoreManagerFactory; + private final RealmContext realmContext; + private final ObjectMapper objectMapper; + private final Instance securityIdentityInstance; + + @Inject + public EventsMetricsReporter( + MetaStoreManagerFactory metaStoreManagerFactory, + RealmContext realmContext, + ObjectMapper objectMapper, + Instance securityIdentityInstance) { + this.metaStoreManagerFactory = metaStoreManagerFactory; + this.realmContext = realmContext; + this.objectMapper = objectMapper; + this.securityIdentityInstance = securityIdentityInstance; + } + + @Override + public void reportMetric(String catalogName, TableIdentifier table, MetricsReport metricsReport) { + try { + String eventType; + PolarisEvent.ResourceType resourceType = PolarisEvent.ResourceType.TABLE; + String resourceIdentifier = table.toString(); + + if (metricsReport instanceof ScanReport) { + eventType = EVENT_TYPE_SCAN_REPORT; + } else if (metricsReport instanceof CommitReport) { + eventType = EVENT_TYPE_COMMIT_REPORT; + } else { + LOGGER.warn("Unknown metrics report type: {}", metricsReport.getClass().getName()); + return; + } + + // Extract principal name from security context + String principalName = extractPrincipalName(); + + // Extract OpenTelemetry trace context + String otelTraceId = null; + String otelSpanId = null; + Span currentSpan = Span.current(); + if (currentSpan != null) { + SpanContext spanContext = currentSpan.getSpanContext(); + if (spanContext != null && spanContext.isValid()) { + otelTraceId = spanContext.getTraceId(); + otelSpanId = spanContext.getSpanId(); + } + } + + // Serialize the metrics report and add trace context to additional properties + Map additionalProps = new HashMap<>(); + additionalProps.put("metricsReport", serializeMetricsReportToMap(metricsReport)); + if (otelTraceId != null) { + additionalProps.put("otelTraceId", otelTraceId); + } + if (otelSpanId != null) { + additionalProps.put("otelSpanId", otelSpanId); + } + String additionalPropsJson = serializeToJson(additionalProps); + + PolarisEvent event = + new PolarisEvent( + catalogName, + UUID.randomUUID().toString(), + null, // requestId - could be extracted from context if available + eventType, + System.currentTimeMillis(), + principalName, + resourceType, + resourceIdentifier); + event.setAdditionalProperties(additionalPropsJson); + + // Get the persistence session for the current realm and write the event + BasePersistence session = metaStoreManagerFactory.getOrCreateSession(realmContext); + session.writeEvents(List.of(event)); + + LOGGER.debug("Persisted {} event for table {}.{}", eventType, catalogName, table); + } catch (Exception e) { + LOGGER.error( + "Failed to persist metrics event for table {}.{}: {}", + catalogName, + table, + e.getMessage(), + e); + } + } + + /** + * Extracts the principal name from the current security context. + * + * @return the principal name, or null if not available + */ + private String extractPrincipalName() { + try { + if (securityIdentityInstance.isResolvable()) { + SecurityIdentity identity = securityIdentityInstance.get(); + if (identity != null && !identity.isAnonymous()) { + Principal principal = identity.getPrincipal(); + if (principal != null) { + return principal.getName(); + } + } + } + } catch (Exception e) { + LOGGER.trace("Could not extract principal name from security context: {}", e.getMessage()); + } + return null; + } + + private Object serializeMetricsReportToMap(MetricsReport metricsReport) { + try { + String json = objectMapper.writeValueAsString(metricsReport); + return objectMapper.readValue(json, Object.class); + } catch (JsonProcessingException e) { + LOGGER.warn("Failed to serialize metrics report: {}", e.getMessage()); + return Map.of(); + } + } + + private String serializeToJson(Object obj) { + try { + return objectMapper.writeValueAsString(obj); + } catch (JsonProcessingException e) { + LOGGER.warn("Failed to serialize to JSON: {}", e.getMessage()); + return "{}"; + } + } +} diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java index 3d60302ab3..a041b3170c 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java @@ -20,9 +20,77 @@ import io.smallrye.config.ConfigMapping; import io.smallrye.config.WithDefault; +import java.time.Duration; +import java.util.List; @ConfigMapping(prefix = "polaris.iceberg-metrics.reporting") public interface MetricsReportingConfiguration { + /** + * The type of metrics reporter to use. Supported values: + * + *
      + *
    • {@code default} - Log metrics to console only (no persistence) + *
    • {@code events} - Persist metrics to the events table as JSON + *
    • {@code persistence} - Persist metrics to dedicated tables (scan_metrics_report, + * commit_metrics_report) + *
    • {@code composite} - Use multiple reporters based on the {@link #targets()} configuration + *
    + * + * @return the reporter type + */ @WithDefault("default") String type(); + + /** + * List of reporter targets to use when {@link #type()} is set to {@code composite}. Each target + * corresponds to a reporter type: {@code default}, {@code events}, or {@code persistence}. + * + *

    Example configuration: + * + *

    +   * polaris:
    +   *   iceberg-metrics:
    +   *     reporting:
    +   *       type: composite
    +   *       targets:
    +   *         - events
    +   *         - persistence
    +   * 
    + * + * @return list of reporter targets, empty if not using composite type + */ + default List targets() { + return List.of(); + } + + /** Configuration for metrics retention and cleanup. */ + RetentionConfig retention(); + + interface RetentionConfig { + /** + * Whether automatic cleanup of old metrics reports is enabled. Default is false (disabled). + * + * @return true if cleanup is enabled + */ + @WithDefault("false") + boolean enabled(); + + /** + * How long to retain metrics reports before they are eligible for cleanup. Default is 30 days. + * Supports ISO-8601 duration format (e.g., "P30D" for 30 days, "PT24H" for 24 hours). + * + * @return the retention period + */ + @WithDefault("P30D") + Duration retentionPeriod(); + + /** + * How often to run the cleanup job. Default is every 6 hours. Supports ISO-8601 duration + * format. + * + * @return the cleanup interval + */ + @WithDefault("PT6H") + Duration cleanupInterval(); + } } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java new file mode 100644 index 0000000000..136a84d045 --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanContext; +import io.quarkus.security.identity.SecurityIdentity; +import io.smallrye.common.annotation.Identifier; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.enterprise.inject.Instance; +import jakarta.inject.Inject; +import java.security.Principal; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.MetricsReport; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.persistence.BasePersistence; +import org.apache.polaris.core.persistence.MetaStoreManagerFactory; +import org.apache.polaris.persistence.relational.jdbc.JdbcBasePersistenceImpl; +import org.apache.polaris.persistence.relational.jdbc.models.MetricsReportConverter; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A metrics reporter that persists scan and commit reports as first-class entities in the database. + * This provides better queryability and analytics capabilities compared to storing metrics as + * generic events. + * + *

    To enable this reporter, set the configuration: + * + *

    + * polaris:
    + *   iceberg-metrics:
    + *     reporting:
    + *       type: persistence
    + * 
    + * + *

    Note: This reporter requires the relational-jdbc persistence backend. If a different + * persistence backend is configured, metrics will be logged but not persisted. + */ +@ApplicationScoped +@Identifier("persistence") +public class PersistingMetricsReporter implements PolarisMetricsReporter { + + private static final Logger LOGGER = LoggerFactory.getLogger(PersistingMetricsReporter.class); + + private final MetaStoreManagerFactory metaStoreManagerFactory; + private final RealmContext realmContext; + private final Instance securityIdentityInstance; + + @Inject + public PersistingMetricsReporter( + MetaStoreManagerFactory metaStoreManagerFactory, + RealmContext realmContext, + Instance securityIdentityInstance) { + this.metaStoreManagerFactory = metaStoreManagerFactory; + this.realmContext = realmContext; + this.securityIdentityInstance = securityIdentityInstance; + } + + @Override + public void reportMetric(String catalogName, TableIdentifier table, MetricsReport metricsReport) { + try { + String realmId = realmContext.getRealmIdentifier(); + String catalogId = catalogName; // Using catalog name as ID for now + String namespace = table.namespace().toString(); + + // Extract principal name from security context + String principalName = extractPrincipalName(); + String requestId = null; + + // Extract OpenTelemetry trace context from the current span + String otelTraceId = null; + String otelSpanId = null; + Span currentSpan = Span.current(); + if (currentSpan != null) { + SpanContext spanContext = currentSpan.getSpanContext(); + if (spanContext != null && spanContext.isValid()) { + otelTraceId = spanContext.getTraceId(); + otelSpanId = spanContext.getSpanId(); + LOGGER.trace( + "Captured OpenTelemetry context: traceId={}, spanId={}", otelTraceId, otelSpanId); + } + } + + // Get the persistence session for the current realm + BasePersistence session = metaStoreManagerFactory.getOrCreateSession(realmContext); + + // Check if the session is a JdbcBasePersistenceImpl (supports metrics persistence) + if (!(session instanceof JdbcBasePersistenceImpl jdbcPersistence)) { + LOGGER.warn( + "Metrics persistence is only supported with relational-jdbc backend. " + + "Current backend: {}. Logging metrics instead.", + session.getClass().getSimpleName()); + LOGGER.info("{}.{}: {}", catalogName, table, metricsReport); + return; + } + + if (metricsReport instanceof ScanReport scanReport) { + ModelScanMetricsReport modelReport = + MetricsReportConverter.fromScanReport( + scanReport, + realmId, + catalogId, + catalogName, + namespace, + principalName, + requestId, + otelTraceId, + otelSpanId); + jdbcPersistence.writeScanMetricsReport(modelReport); + LOGGER.debug( + "Persisted scan metrics report {} for table {}.{}", + modelReport.getReportId(), + catalogName, + table); + } else if (metricsReport instanceof CommitReport commitReport) { + ModelCommitMetricsReport modelReport = + MetricsReportConverter.fromCommitReport( + commitReport, + realmId, + catalogId, + catalogName, + namespace, + principalName, + requestId, + otelTraceId, + otelSpanId); + jdbcPersistence.writeCommitMetricsReport(modelReport); + LOGGER.debug( + "Persisted commit metrics report {} for table {}.{}", + modelReport.getReportId(), + catalogName, + table); + } else { + LOGGER.warn("Unknown metrics report type: {}", metricsReport.getClass().getName()); + } + } catch (Exception e) { + LOGGER.error( + "Failed to persist metrics report for table {}.{}: {}", + catalogName, + table, + e.getMessage(), + e); + } + } + + /** + * Extracts the principal name from the current security context. + * + * @return the principal name, or null if not available + */ + private String extractPrincipalName() { + try { + if (securityIdentityInstance.isResolvable()) { + SecurityIdentity identity = securityIdentityInstance.get(); + if (identity != null && !identity.isAnonymous()) { + Principal principal = identity.getPrincipal(); + if (principal != null) { + return principal.getName(); + } + } + } + } catch (Exception e) { + LOGGER.trace("Could not extract principal name from security context: {}", e.getMessage()); + } + return null; + } +} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/CompositeMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/CompositeMetricsReporterTest.java new file mode 100644 index 0000000000..a4e67248be --- /dev/null +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/CompositeMetricsReporterTest.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +import java.util.List; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.ScanReport; +import org.junit.jupiter.api.Test; + +class CompositeMetricsReporterTest { + + @Test + void testDelegatesToAllReporters() { + PolarisMetricsReporter reporter1 = mock(PolarisMetricsReporter.class); + PolarisMetricsReporter reporter2 = mock(PolarisMetricsReporter.class); + PolarisMetricsReporter reporter3 = mock(PolarisMetricsReporter.class); + + CompositeMetricsReporter composite = + new CompositeMetricsReporter(List.of(reporter1, reporter2, reporter3)); + + ScanReport scanReport = mock(ScanReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + composite.reportMetric("test-catalog", table, scanReport); + + verify(reporter1).reportMetric("test-catalog", table, scanReport); + verify(reporter2).reportMetric("test-catalog", table, scanReport); + verify(reporter3).reportMetric("test-catalog", table, scanReport); + } + + @Test + void testContinuesOnDelegateFailure() { + PolarisMetricsReporter reporter1 = mock(PolarisMetricsReporter.class); + PolarisMetricsReporter reporter2 = mock(PolarisMetricsReporter.class); + PolarisMetricsReporter reporter3 = mock(PolarisMetricsReporter.class); + + // Make reporter2 throw an exception + doThrow(new RuntimeException("Reporter 2 failed")) + .when(reporter2) + .reportMetric(any(), any(), any()); + + CompositeMetricsReporter composite = + new CompositeMetricsReporter(List.of(reporter1, reporter2, reporter3)); + + ScanReport scanReport = mock(ScanReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + // Should not throw + composite.reportMetric("test-catalog", table, scanReport); + + // All reporters should still be called + verify(reporter1).reportMetric("test-catalog", table, scanReport); + verify(reporter2).reportMetric("test-catalog", table, scanReport); + verify(reporter3).reportMetric("test-catalog", table, scanReport); + } + + @Test + void testEmptyDelegatesList() { + CompositeMetricsReporter composite = new CompositeMetricsReporter(List.of()); + + ScanReport scanReport = mock(ScanReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + // Should not throw + composite.reportMetric("test-catalog", table, scanReport); + + assertThat(composite.getDelegates()).isEmpty(); + } + + @Test + void testSingleDelegate() { + PolarisMetricsReporter reporter = mock(PolarisMetricsReporter.class); + CompositeMetricsReporter composite = new CompositeMetricsReporter(List.of(reporter)); + + ScanReport scanReport = mock(ScanReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + composite.reportMetric("test-catalog", table, scanReport); + + verify(reporter).reportMetric("test-catalog", table, scanReport); + assertThat(composite.getDelegates()).hasSize(1); + } + + @Test + void testGetDelegatesReturnsUnmodifiableList() { + PolarisMetricsReporter reporter = mock(PolarisMetricsReporter.class); + CompositeMetricsReporter composite = new CompositeMetricsReporter(List.of(reporter)); + + List delegates = composite.getDelegates(); + + // Should be unmodifiable + assertThat(delegates).hasSize(1); + org.junit.jupiter.api.Assertions.assertThrows( + UnsupportedOperationException.class, + () -> delegates.add(mock(PolarisMetricsReporter.class))); + } + + @Test + void testNullMetricsReportDoesNotThrow() { + PolarisMetricsReporter reporter = mock(PolarisMetricsReporter.class); + CompositeMetricsReporter composite = new CompositeMetricsReporter(List.of(reporter)); + + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + // Should not throw even with null report + composite.reportMetric("test-catalog", table, null); + + verify(reporter).reportMetric(eq("test-catalog"), eq(table), eq(null)); + } +} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/EventsMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/EventsMetricsReporterTest.java new file mode 100644 index 0000000000..c36d8a76c9 --- /dev/null +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/EventsMetricsReporterTest.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.ObjectMapper; +import io.quarkus.security.identity.SecurityIdentity; +import jakarta.enterprise.inject.Instance; +import java.util.List; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.MetricsReport; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.entity.PolarisEvent; +import org.apache.polaris.core.persistence.BasePersistence; +import org.apache.polaris.core.persistence.MetaStoreManagerFactory; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; + +class EventsMetricsReporterTest { + + private MetaStoreManagerFactory metaStoreManagerFactory; + private RealmContext realmContext; + private BasePersistence persistence; + private ObjectMapper objectMapper; + + @SuppressWarnings("unchecked") + private Instance securityIdentityInstance = mock(Instance.class); + + private EventsMetricsReporter reporter; + + @BeforeEach + void setUp() { + metaStoreManagerFactory = mock(MetaStoreManagerFactory.class); + realmContext = mock(RealmContext.class); + persistence = mock(BasePersistence.class); + objectMapper = new ObjectMapper(); + + when(realmContext.getRealmIdentifier()).thenReturn("test-realm"); + when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(persistence); + when(securityIdentityInstance.isResolvable()).thenReturn(false); + + reporter = + new EventsMetricsReporter( + metaStoreManagerFactory, realmContext, objectMapper, securityIdentityInstance); + } + + @Test + void testReportScanMetrics() { + ScanReport scanReport = mock(ScanReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + reporter.reportMetric("test-catalog", table, scanReport); + + @SuppressWarnings("unchecked") + ArgumentCaptor> captor = ArgumentCaptor.forClass(List.class); + verify(persistence).writeEvents(captor.capture()); + + List events = captor.getValue(); + assertThat(events).hasSize(1); + + PolarisEvent event = events.get(0); + assertThat(event.getEventType()).isEqualTo(EventsMetricsReporter.EVENT_TYPE_SCAN_REPORT); + assertThat(event.getCatalogId()).isEqualTo("test-catalog"); + assertThat(event.getResourceType()).isEqualTo(PolarisEvent.ResourceType.TABLE); + assertThat(event.getResourceIdentifier()).isEqualTo("db.test_table"); + } + + @Test + void testReportCommitMetrics() { + CommitReport commitReport = mock(CommitReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + reporter.reportMetric("test-catalog", table, commitReport); + + @SuppressWarnings("unchecked") + ArgumentCaptor> captor = ArgumentCaptor.forClass(List.class); + verify(persistence).writeEvents(captor.capture()); + + List events = captor.getValue(); + assertThat(events).hasSize(1); + + PolarisEvent event = events.get(0); + assertThat(event.getEventType()).isEqualTo(EventsMetricsReporter.EVENT_TYPE_COMMIT_REPORT); + assertThat(event.getCatalogId()).isEqualTo("test-catalog"); + assertThat(event.getResourceType()).isEqualTo(PolarisEvent.ResourceType.TABLE); + assertThat(event.getResourceIdentifier()).isEqualTo("db.test_table"); + } + + @Test + void testUnknownMetricsReportTypeIsIgnored() { + MetricsReport unknownReport = mock(MetricsReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + reporter.reportMetric("test-catalog", table, unknownReport); + + verify(persistence, never()).writeEvents(any()); + } + + @Test + void testEventContainsSerializedMetrics() { + // Create a mock ScanReport + ScanReport scanReport = mock(ScanReport.class); + when(scanReport.tableName()).thenReturn("test_table"); + when(scanReport.snapshotId()).thenReturn(12345L); + + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + reporter.reportMetric("test-catalog", table, scanReport); + + @SuppressWarnings("unchecked") + ArgumentCaptor> captor = ArgumentCaptor.forClass(List.class); + verify(persistence).writeEvents(captor.capture()); + + PolarisEvent event = captor.getValue().get(0); + String additionalProps = event.getAdditionalProperties(); + // Should contain JSON (at minimum an empty object or serialized report) + assertThat(additionalProps).isNotNull(); + } + + @Test + void testPersistenceErrorDoesNotThrow() { + ScanReport scanReport = mock(ScanReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + when(metaStoreManagerFactory.getOrCreateSession(any())) + .thenThrow(new RuntimeException("Database error")); + + // Should not throw + reporter.reportMetric("test-catalog", table, scanReport); + } +} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java new file mode 100644 index 0000000000..64740c5bfa --- /dev/null +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.quarkus.security.identity.SecurityIdentity; +import jakarta.enterprise.inject.Instance; +import java.security.Principal; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.MetricsReport; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.persistence.BasePersistence; +import org.apache.polaris.core.persistence.MetaStoreManagerFactory; +import org.apache.polaris.persistence.relational.jdbc.JdbcBasePersistenceImpl; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; + +class PersistingMetricsReporterTest { + + private MetaStoreManagerFactory metaStoreManagerFactory; + private RealmContext realmContext; + private JdbcBasePersistenceImpl jdbcPersistence; + private BasePersistence nonJdbcPersistence; + + @SuppressWarnings("unchecked") + private Instance securityIdentityInstance = mock(Instance.class); + + private PersistingMetricsReporter reporter; + + @BeforeEach + void setUp() { + metaStoreManagerFactory = mock(MetaStoreManagerFactory.class); + realmContext = mock(RealmContext.class); + jdbcPersistence = mock(JdbcBasePersistenceImpl.class); + nonJdbcPersistence = mock(BasePersistence.class); + + when(realmContext.getRealmIdentifier()).thenReturn("test-realm"); + when(securityIdentityInstance.isResolvable()).thenReturn(false); + + reporter = + new PersistingMetricsReporter( + metaStoreManagerFactory, realmContext, securityIdentityInstance); + } + + @Test + void testReportScanMetricsWithJdbcBackend() { + when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(jdbcPersistence); + + ScanReport scanReport = mock(ScanReport.class); + when(scanReport.tableName()).thenReturn("test_table"); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + reporter.reportMetric("test-catalog", table, scanReport); + + verify(jdbcPersistence).writeScanMetricsReport(any(ModelScanMetricsReport.class)); + } + + @Test + void testReportCommitMetricsWithJdbcBackend() { + when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(jdbcPersistence); + + CommitReport commitReport = mock(CommitReport.class); + when(commitReport.tableName()).thenReturn("test_table"); + when(commitReport.snapshotId()).thenReturn(12345L); + when(commitReport.operation()).thenReturn("append"); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + reporter.reportMetric("test-catalog", table, commitReport); + + verify(jdbcPersistence).writeCommitMetricsReport(any(ModelCommitMetricsReport.class)); + } + + @Test + void testFallbackToLoggingWithNonJdbcBackend() { + when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(nonJdbcPersistence); + + ScanReport scanReport = mock(ScanReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + // Should not throw, just log + reporter.reportMetric("test-catalog", table, scanReport); + + // Verify no JDBC methods were called + verify(jdbcPersistence, never()).writeScanMetricsReport(any()); + } + + @Test + void testUnknownMetricsReportTypeIsIgnored() { + when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(jdbcPersistence); + + MetricsReport unknownReport = mock(MetricsReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + reporter.reportMetric("test-catalog", table, unknownReport); + + verify(jdbcPersistence, never()).writeScanMetricsReport(any()); + verify(jdbcPersistence, never()).writeCommitMetricsReport(any()); + } + + @Test + void testPrincipalNameExtraction() { + when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(jdbcPersistence); + + // Set up security identity with a principal + SecurityIdentity identity = mock(SecurityIdentity.class); + Principal principal = mock(Principal.class); + when(principal.getName()).thenReturn("test-user"); + when(identity.isAnonymous()).thenReturn(false); + when(identity.getPrincipal()).thenReturn(principal); + when(securityIdentityInstance.isResolvable()).thenReturn(true); + when(securityIdentityInstance.get()).thenReturn(identity); + + ScanReport scanReport = mock(ScanReport.class); + when(scanReport.tableName()).thenReturn("test_table"); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + reporter.reportMetric("test-catalog", table, scanReport); + + ArgumentCaptor captor = + ArgumentCaptor.forClass(ModelScanMetricsReport.class); + verify(jdbcPersistence).writeScanMetricsReport(captor.capture()); + + // The principal name should be captured in the report + // Note: The actual assertion depends on how the model is built + } + + @Test + void testPersistenceErrorDoesNotThrow() { + when(metaStoreManagerFactory.getOrCreateSession(any())) + .thenThrow(new RuntimeException("Database error")); + + ScanReport scanReport = mock(ScanReport.class); + TableIdentifier table = TableIdentifier.of("db", "test_table"); + + // Should not throw + reporter.reportMetric("test-catalog", table, scanReport); + } +} From 0a0ffdcf585d68fd15877a67af7e58ee360330b2 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 16 Jan 2026 19:47:53 -0800 Subject: [PATCH 19/67] feat(cleanup): Add metrics cleanup service and documentation (PR#6) This commit adds the cleanup service and documentation: MetricsReportCleanupService: - Scheduled service for automatic cleanup of old metrics reports - Configurable retention period (default: 30 days) - Runs periodically to delete expired scan and commit reports - Prevents database growth from unbounded metrics accumulation Configuration options: - polaris.iceberg-metrics.cleanup.enabled: Enable/disable cleanup - polaris.iceberg-metrics.cleanup.retention-days: Days to retain reports - polaris.iceberg-metrics.cleanup.schedule: Cron expression for cleanup runs Documentation: - telemetry.md: Updated documentation for metrics persistence feature - Configuration reference - Usage examples - Architecture overview This completes the metrics persistence feature by adding operational tooling for production deployments. --- .../MetricsReportCleanupService.java | 198 ++++++++++++++++++ site/content/in-dev/unreleased/telemetry.md | 179 ++++++++++++++++ 2 files changed, 377 insertions(+) create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java new file mode 100644 index 0000000000..04bbca6365 --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import io.quarkus.runtime.Startup; +import io.quarkus.scheduler.Scheduled; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import java.time.Duration; +import java.time.Instant; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.persistence.BasePersistence; +import org.apache.polaris.core.persistence.MetaStoreManagerFactory; +import org.apache.polaris.persistence.relational.jdbc.JdbcBasePersistenceImpl; +import org.apache.polaris.service.context.RealmContextConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Scheduled service that cleans up old metrics reports based on the configured retention policy. + * + *

    This service runs periodically and deletes metrics reports that are older than the configured + * retention period. It only operates when the persistence backend is relational-jdbc. + * + *

    Configuration example: + * + *

    + * polaris:
    + *   iceberg-metrics:
    + *     reporting:
    + *       type: persistence
    + *       retention:
    + *         enabled: true
    + *         retention-period: P30D  # 30 days
    + *         cleanup-interval: PT6H  # every 6 hours
    + * 
    + */ +@ApplicationScoped +@Startup +public class MetricsReportCleanupService { + + private static final Logger LOGGER = LoggerFactory.getLogger(MetricsReportCleanupService.class); + + private final MetricsReportingConfiguration config; + private final MetaStoreManagerFactory metaStoreManagerFactory; + private final RealmContextConfiguration realmContextConfiguration; + private final AtomicBoolean running = new AtomicBoolean(false); + + @Inject + public MetricsReportCleanupService( + MetricsReportingConfiguration config, + MetaStoreManagerFactory metaStoreManagerFactory, + RealmContextConfiguration realmContextConfiguration) { + this.config = config; + this.metaStoreManagerFactory = metaStoreManagerFactory; + this.realmContextConfiguration = realmContextConfiguration; + + if (config.retention().enabled()) { + LOGGER.info( + "Metrics report cleanup enabled with retention period: {}, cleanup interval: {}", + config.retention().retentionPeriod(), + config.retention().cleanupInterval()); + } else { + LOGGER.debug("Metrics report cleanup is disabled"); + } + } + + /** + * Scheduled cleanup job that runs at the configured interval. The actual interval is configured + * via the retention.cleanup-interval property. + */ + @Scheduled(every = "${polaris.iceberg-metrics.reporting.retention.cleanup-interval:6h}") + public void cleanupOldMetricsReports() { + if (!config.retention().enabled()) { + LOGGER.trace("Metrics cleanup is disabled, skipping"); + return; + } + + // Prevent concurrent runs + if (!running.compareAndSet(false, true)) { + LOGGER.debug("Cleanup already in progress, skipping this run"); + return; + } + + try { + performCleanup(); + } finally { + running.set(false); + } + } + + private void performCleanup() { + Duration retentionPeriod = config.retention().retentionPeriod(); + long cutoffTimestamp = Instant.now().minus(retentionPeriod).toEpochMilli(); + List realmIds = realmContextConfiguration.realms(); + + LOGGER.info( + "Starting metrics report cleanup across {} realm(s). Deleting reports older than {} (cutoff: {})", + realmIds.size(), + retentionPeriod, + Instant.ofEpochMilli(cutoffTimestamp)); + + int totalDeleted = 0; + for (String realmId : realmIds) { + try { + int deletedCount = cleanupForRealm(realmId, cutoffTimestamp); + if (deletedCount > 0) { + LOGGER.info("Deleted {} old metrics reports from realm '{}'", deletedCount, realmId); + totalDeleted += deletedCount; + } else { + LOGGER.debug("No old metrics reports to delete in realm '{}'", realmId); + } + } catch (Exception e) { + LOGGER.error( + "Failed to cleanup old metrics reports for realm '{}': {}", realmId, e.getMessage(), e); + } + } + + if (totalDeleted > 0) { + LOGGER.info("Total deleted metrics reports across all realms: {}", totalDeleted); + } + } + + private int cleanupForRealm(String realmId, long cutoffTimestamp) { + RealmContext realmContext = () -> realmId; + BasePersistence session = metaStoreManagerFactory.getOrCreateSession(realmContext); + + if (!(session instanceof JdbcBasePersistenceImpl jdbcPersistence)) { + LOGGER.debug( + "Metrics cleanup is only supported with relational-jdbc backend. " + + "Current backend: {} for realm '{}'", + session.getClass().getSimpleName(), + realmId); + return 0; + } + + return jdbcPersistence.deleteAllMetricsReportsOlderThan(cutoffTimestamp); + } + + /** + * Manually trigger a cleanup across all realms. This can be called from an admin endpoint or for + * testing. + * + * @return the total number of reports deleted across all realms, or -1 if cleanup is disabled or + * failed + */ + public int triggerCleanup() { + if (!config.retention().enabled()) { + LOGGER.warn("Cannot trigger cleanup: retention is disabled"); + return -1; + } + + if (!running.compareAndSet(false, true)) { + LOGGER.warn("Cannot trigger cleanup: cleanup already in progress"); + return -1; + } + + try { + Duration retentionPeriod = config.retention().retentionPeriod(); + long cutoffTimestamp = Instant.now().minus(retentionPeriod).toEpochMilli(); + List realmIds = realmContextConfiguration.realms(); + + int totalDeleted = 0; + for (String realmId : realmIds) { + try { + int deletedCount = cleanupForRealm(realmId, cutoffTimestamp); + totalDeleted += deletedCount; + } catch (Exception e) { + LOGGER.error("Failed to cleanup metrics for realm '{}': {}", realmId, e.getMessage(), e); + } + } + return totalDeleted; + } catch (Exception e) { + LOGGER.error("Failed to trigger cleanup: {}", e.getMessage(), e); + return -1; + } finally { + running.set(false); + } + } +} diff --git a/site/content/in-dev/unreleased/telemetry.md b/site/content/in-dev/unreleased/telemetry.md index fc1a1336ce..7edcc2d603 100644 --- a/site/content/in-dev/unreleased/telemetry.md +++ b/site/content/in-dev/unreleased/telemetry.md @@ -191,6 +191,185 @@ polaris.log.mdc.region=us-west-2 MDC context is propagated across threads, including in `TaskExecutor` threads. +## Compute Client Audit Reporting + +Polaris supports end-to-end audit correlation between catalog operations, credential vending, and +compute engine metrics reports. This enables organizations to trace data access from the initial +catalog request through to actual S3/GCS/Azure storage access. + +### Metrics Reporting Endpoint + +Compute engines can report scan and commit metrics to Polaris using the standard Iceberg REST +Catalog metrics endpoint: + +``` +POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics +``` + +**Request Body**: A `ReportMetricsRequest` containing either a `ScanReport` or `CommitReport`: + +```json +{ + "report-type": "scan-report", + "table-name": "my_table", + "snapshot-id": 123456789, + "schema-id": 0, + "projected-field-ids": [1, 2, 3], + "projected-field-names": ["id", "name", "value"], + "filter": {"type": "always-true"}, + "metrics": { + "result-data-files": {"unit": "count", "value": 10}, + "total-file-size-bytes": {"unit": "bytes", "value": 1048576} + }, + "metadata": { + "trace-id": "abcdef1234567890abcdef1234567890", + "client-app": "spark-3.5" + } +} +``` + +**Response**: `204 No Content` on success. + +The `metadata` map in the report can contain a `trace-id` for correlation with other audit events. +This trace ID is extracted and stored in the event's `additional_properties` with a `report.` prefix. + +### Trace Correlation + +When OpenTelemetry is enabled, Polaris captures the `trace_id` at multiple points: + +1. **Catalog Operations**: Events like `loadTable`, `createTable` include the OpenTelemetry trace + context in their metadata. +2. **Credential Vending**: When AWS STS session tags are enabled, the `trace_id` is included as a + session tag (`polaris:trace_id`) in the vended credentials. This appears in AWS CloudTrail logs. +3. **Metrics Reports**: When compute engines report scan/commit metrics back to Polaris, the + `reportMetrics` events capture both the OpenTelemetry trace context from HTTP headers and any + `trace-id` passed in the report's `metadata` map. + +### Enabling Session Tags for AWS + +To enable session tags (including trace_id) in AWS STS credentials, set the following feature flag: + +```properties +polaris.features."INCLUDE_SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL"=true +``` + +This adds the following tags to all STS AssumeRole requests: + +- `polaris:catalog` - The catalog name +- `polaris:namespace` - The namespace being accessed +- `polaris:table` - The table name +- `polaris:principal` - The authenticated principal +- `polaris:roles` - The activated principal roles +- `polaris:trace_id` - The OpenTelemetry trace ID + +These tags appear in AWS CloudTrail logs, enabling correlation with Polaris audit events. + +**Note**: Enabling session tags requires the IAM role trust policy to allow the `sts:TagSession` +action. This feature may also reduce credential caching effectiveness since credentials become +specific to each table/namespace/role combination. + +### Compute Engine Integration + +For end-to-end trace correlation, compute engines should propagate the W3C Trace Context headers +when making requests to Polaris. The standard headers are: + +- `traceparent`: Contains the trace ID, parent span ID, and trace flags +- `tracestate`: Optional vendor-specific trace information + +#### Apache Spark + +Spark can propagate trace context using the OpenTelemetry Java agent. Add the agent to your Spark +submit command: + +```bash +spark-submit \ + --conf "spark.driver.extraJavaOptions=-javaagent:/path/to/opentelemetry-javaagent.jar" \ + --conf "spark.executor.extraJavaOptions=-javaagent:/path/to/opentelemetry-javaagent.jar" \ + -Dotel.service.name=spark-app \ + -Dotel.exporter.otlp.endpoint=http://collector:4317 \ + your-application.jar +``` + +Alternatively, configure the agent via environment variables: + +```bash +export OTEL_SERVICE_NAME=spark-app +export OTEL_EXPORTER_OTLP_ENDPOINT=http://collector:4317 +export JAVA_TOOL_OPTIONS="-javaagent:/path/to/opentelemetry-javaagent.jar" +``` + +#### Trino + +Trino supports OpenTelemetry tracing with the following configuration in `config.properties`: + +```properties +tracing.enabled=true +tracing.exporter.endpoint=http://collector:4317 +``` + +#### Flink + +Flink can be configured with OpenTelemetry using the Java agent: + +```bash +-javaagent:/path/to/opentelemetry-javaagent.jar \ +-Dotel.service.name=flink-job \ +-Dotel.exporter.otlp.endpoint=http://collector:4317 +``` + +### Correlating Audit Events + +With trace correlation enabled, you can join events across systems: + +1. **Polaris Events**: Query the events table for operations with a specific `trace_id` +2. **CloudTrail Logs**: Filter by the `polaris:trace_id` session tag +3. **Compute Engine Logs**: Search for the same trace ID in engine logs + +Example queries to find all Polaris events for a trace: + +**PostgreSQL** (using JSON operators): +```sql +SELECT * FROM polaris_schema.events +WHERE additional_properties->>'otel.trace_id' = '' + OR additional_properties->>'report.trace-id' = '' +ORDER BY timestamp_ms; +``` + +**H2/Generic SQL** (using LIKE pattern matching): +```sql +SELECT * FROM polaris_schema.events +WHERE additional_properties LIKE '%%' +ORDER BY timestamp_ms; +``` + +### Metrics Event Data + +The `AfterReportMetricsEvent` captures the following data in `additional_properties`: + +**For ScanReports:** +- `report_type`: "scan" +- `snapshot_id`: The snapshot ID being scanned +- `schema_id`: The schema ID +- `result_data_files`: Number of data files in the scan result +- `result_delete_files`: Number of delete files in the scan result +- `total_file_size_bytes`: Total size of files scanned +- `scanned_data_manifests`: Number of data manifests scanned +- `skipped_data_manifests`: Number of data manifests skipped +- `report.*`: Any metadata from the report's metadata map (e.g., `report.trace-id`) + +**For CommitReports:** +- `report_type`: "commit" +- `snapshot_id`: The new snapshot ID +- `sequence_number`: The sequence number +- `operation`: The operation type (e.g., "append", "overwrite") +- `added_data_files`: Number of data files added +- `removed_data_files`: Number of data files removed +- `added_records`: Number of records added +- `removed_records`: Number of records removed +- `added_file_size_bytes`: Total size of files added +- `removed_file_size_bytes`: Total size of files removed +- `report.*`: Any metadata from the report's metadata map (e.g., `report.trace-id`) + ## Links Visit [Using Polaris with telemetry tools]({{% relref "getting-started/using-polaris/telemetry-tools" %}}) to see sample Polaris config with Prometheus and Jaeger. From 3507365771cdcf77bb1b25a7f8bab1c7df2cdd01 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 16 Jan 2026 19:48:27 -0800 Subject: [PATCH 20/67] test(integration): Update InMemoryBufferEventListenerIntegrationTest for metrics events Update integration tests to include metrics event reporting scenarios: - Add imports for ScanReport, ScanMetrics, ReportMetricsRequest - Add BeforeEach hook to reset database state for test isolation - Add test for BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events - Add ALLOW_OVERLAPPING_CATALOG_URLS feature flag for test setup This ensures the metrics event emission is properly tested in the integration test suite. --- ...oryBufferEventListenerIntegrationTest.java | 379 +++++++++++++++++- 1 file changed, 378 insertions(+), 1 deletion(-) diff --git a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java index f8383f1c43..ebbbe618dc 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java @@ -49,8 +49,14 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SessionCatalog; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.metrics.ImmutableScanReport; +import org.apache.iceberg.metrics.ScanMetrics; +import org.apache.iceberg.metrics.ScanMetricsResult; +import org.apache.iceberg.metrics.ScanReport; import org.apache.iceberg.rest.RESTSessionCatalog; import org.apache.iceberg.rest.auth.OAuth2Properties; +import org.apache.iceberg.rest.requests.ReportMetricsRequest; import org.apache.iceberg.types.Types; import org.apache.polaris.core.admin.model.Catalog; import org.apache.polaris.core.admin.model.CatalogProperties; @@ -65,6 +71,7 @@ import org.apache.polaris.service.it.env.RestApi; import org.apache.polaris.service.it.ext.PolarisIntegrationTestExtension; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.extension.ExtendWith; @@ -92,6 +99,7 @@ public Map getConfigOverrides() { .put("polaris.event-listener.persistence-in-memory-buffer.buffer-time", "100ms") .put("polaris.features.\"ALLOW_INSECURE_STORAGE_TYPES\"", "true") .put("polaris.features.\"SUPPORTED_CATALOG_STORAGE_TYPES\"", "[\"FILE\",\"S3\"]") + .put("polaris.features.\"ALLOW_OVERLAPPING_CATALOG_URLS\"", "true") .put("polaris.readiness.ignore-severe-issues", "true") .build(); } @@ -117,10 +125,32 @@ public void setup( baseLocation = IntegrationTestsHelper.getTemporaryDirectory(tempDir).resolve(realm + "/"); } + /** + * Reset the database state before each test to ensure test isolation. The H2 in-memory database + * with DB_CLOSE_DELAY=-1 persists state across tests, so we need to clean up catalog-related + * entities while preserving the realm and principal entities set up in @BeforeAll. + */ + @BeforeEach + public void resetDatabaseState() { + if (dataSource.isResolvable()) { + try (Connection conn = dataSource.get().getConnection(); + Statement stmt = conn.createStatement()) { + // Set the schema first + stmt.execute("SET SCHEMA POLARIS_SCHEMA"); + // Only delete events - catalogs use unique names and locations so they don't conflict + stmt.execute("DELETE FROM EVENTS"); + } catch (Exception e) { + // Ignore errors - tables may not exist yet on first run + } + } + } + @Test void testCreateCatalogAndTable() throws IOException { String catalogName = client.newEntityName("testCreateCatalogAndTable"); + // Use a unique base location for this catalog to avoid overlap with other catalogs + URI catalogBaseLocation = baseLocation.resolve(catalogName + "/"); Catalog catalog = PolarisCatalog.builder() @@ -130,7 +160,7 @@ void testCreateCatalogAndTable() throws IOException { .setStorageConfigInfo( FileStorageConfigInfo.builder() .setStorageType(StorageConfigInfo.StorageTypeEnum.FILE) - .setAllowedLocations(List.of(baseLocation.toString())) + .setAllowedLocations(List.of(catalogBaseLocation.toString())) .build()) .build(); @@ -221,4 +251,351 @@ void testCreateCatalogAndTable() throws IOException { .hasEntrySatisfying("otel.trace_id", value -> assertThat(value).matches("[0-9a-f]{32}")) .hasEntrySatisfying("otel.span_id", value -> assertThat(value).matches("[0-9a-f]{16}")); } + + /** + * Tests that reportMetrics events are emitted with proper trace context for correlation. This + * verifies that compute engine metrics reports can be correlated with other catalog operations + * via the OpenTelemetry trace_id. + */ + @Test + void testReportMetricsEventWithTraceContext() throws IOException { + String catalogName = client.newEntityName("testReportMetrics"); + // Use a unique base location for this catalog to avoid overlap with other catalogs + URI catalogBaseLocation = baseLocation.resolve(catalogName + "/"); + + Catalog catalog = + PolarisCatalog.builder() + .setName(catalogName) + .setType(Catalog.TypeEnum.INTERNAL) + .setProperties(CatalogProperties.builder("file:///tmp/").build()) + .setStorageConfigInfo( + FileStorageConfigInfo.builder() + .setStorageType(StorageConfigInfo.StorageTypeEnum.FILE) + .setAllowedLocations(List.of(catalogBaseLocation.toString())) + .build()) + .build(); + + try (Response response = + managementApi + .request("v1/catalogs") + .header("X-Request-ID", "metrics-test") + .post(Entity.json(catalog))) { + assertThat(response).returns(Response.Status.CREATED.getStatusCode(), Response::getStatus); + } + + // Create a table first + try (RESTSessionCatalog sessionCatalog = new RESTSessionCatalog()) { + sessionCatalog.initialize( + "polaris_catalog_metrics_test", + ImmutableMap.builder() + .put("uri", endpoints.catalogApiEndpoint().toString()) + .put(OAuth2Properties.TOKEN, authToken) + .put("warehouse", catalogName) + .putAll(endpoints.extraHeaders("header.")) + .build()); + + SessionCatalog.SessionContext sessionContext = SessionCatalog.SessionContext.createEmpty(); + Namespace ns = Namespace.of("metrics_ns"); + sessionCatalog.createNamespace(sessionContext, ns); + + sessionCatalog + .buildTable( + sessionContext, + TableIdentifier.of(ns, "metrics_table"), + new Schema(List.of(Types.NestedField.required(1, "id", Types.IntegerType.get())))) + .withSortOrder(SortOrder.unsorted()) + .withPartitionSpec(PartitionSpec.unpartitioned()) + .create(); + } + + // Now send a metrics report via the REST API + // Build a minimal ScanReport for testing + ScanReport scanReport = + ImmutableScanReport.builder() + .schemaId(0) + .tableName("metrics_ns.metrics_table") + .snapshotId(-1L) + .addProjectedFieldIds(1) + .addProjectedFieldNames("id") + .filter(Expressions.alwaysTrue()) + .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) + .build(); + + ReportMetricsRequest metricsRequest = ReportMetricsRequest.of(scanReport); + + RestApi catalogApi = client.catalogApi(authToken); + try (Response response = + catalogApi + .request("v1/" + catalogName + "/namespaces/metrics_ns/tables/metrics_table/metrics") + .header("X-Request-ID", "metrics-report-123") + .post(Entity.json(metricsRequest))) { + assertThat(response).returns(204, Response::getStatus); + } + + // Query for the AFTER_REPORT_METRICS event + String query = + "SELECT * FROM polaris_schema.events WHERE realm_id = '" + + realm + + "' AND event_type = 'AFTER_REPORT_METRICS' ORDER BY timestamp_ms DESC"; + + List metricsEvents = + await() + .atMost(Duration.ofSeconds(10)) + .until( + () -> { + ImmutableList.Builder e = ImmutableList.builder(); + try (Connection connection = dataSource.get().getConnection(); + Statement statement = connection.createStatement(); + ResultSet rs = statement.executeQuery(query)) { + while (rs.next()) { + PolarisEvent event = CONVERTER.fromResultSet(rs); + e.add(event); + } + } + return e.build(); + }, + e -> !e.isEmpty()); + + PolarisEvent metricsEvent = metricsEvents.getFirst(); + assertThat(metricsEvent.getCatalogId()).isEqualTo(catalogName); + assertThat(metricsEvent.getResourceType()).isEqualTo(PolarisEvent.ResourceType.TABLE); + assertThat(metricsEvent.getResourceIdentifier()).isEqualTo("metrics_ns.metrics_table"); + assertThat(metricsEvent.getEventType()).isEqualTo("AFTER_REPORT_METRICS"); + assertThat(metricsEvent.getPrincipalName()).isEqualTo("root"); + assertThat(metricsEvent.getRequestId()).isEqualTo("metrics-report-123"); + + // Verify OpenTelemetry trace context is present for correlation + assertThat(metricsEvent.getAdditionalPropertiesAsMap()) + .containsEntry("otel.trace_flags", "01") + .containsEntry("otel.sampled", "true") + .hasEntrySatisfying("otel.trace_id", value -> assertThat(value).matches("[0-9a-f]{32}")) + .hasEntrySatisfying("otel.span_id", value -> assertThat(value).matches("[0-9a-f]{16}")); + } + + /** + * Tests that ScanReport with trace-id in metadata is properly extracted and stored. This verifies + * that compute engines can pass trace context in the report's metadata map for correlation. + */ + @Test + void testReportMetricsWithTraceIdInMetadata() throws IOException { + String catalogName = client.newEntityName("testMetricsTraceId"); + // Use a unique base location for this catalog to avoid overlap with other catalogs + URI catalogBaseLocation = baseLocation.resolve(catalogName + "/"); + + Catalog catalog = + PolarisCatalog.builder() + .setName(catalogName) + .setType(Catalog.TypeEnum.INTERNAL) + .setProperties(CatalogProperties.builder("file:///tmp/").build()) + .setStorageConfigInfo( + FileStorageConfigInfo.builder() + .setStorageType(StorageConfigInfo.StorageTypeEnum.FILE) + .setAllowedLocations(List.of(catalogBaseLocation.toString())) + .build()) + .build(); + + try (Response response = managementApi.request("v1/catalogs").post(Entity.json(catalog))) { + assertThat(response).returns(Response.Status.CREATED.getStatusCode(), Response::getStatus); + } + + // Create a table first + try (RESTSessionCatalog sessionCatalog = new RESTSessionCatalog()) { + sessionCatalog.initialize( + "polaris_catalog_trace_test", + ImmutableMap.builder() + .put("uri", endpoints.catalogApiEndpoint().toString()) + .put(OAuth2Properties.TOKEN, authToken) + .put("warehouse", catalogName) + .putAll(endpoints.extraHeaders("header.")) + .build()); + + SessionCatalog.SessionContext sessionContext = SessionCatalog.SessionContext.createEmpty(); + Namespace ns = Namespace.of("trace_ns"); + sessionCatalog.createNamespace(sessionContext, ns); + + sessionCatalog + .buildTable( + sessionContext, + TableIdentifier.of(ns, "trace_table"), + new Schema(List.of(Types.NestedField.required(1, "id", Types.IntegerType.get())))) + .withSortOrder(SortOrder.unsorted()) + .withPartitionSpec(PartitionSpec.unpartitioned()) + .create(); + } + + // Build a ScanReport with trace-id in metadata (as compute engines would do) + String clientTraceId = "abcdef1234567890abcdef1234567890"; + ScanReport scanReport = + ImmutableScanReport.builder() + .schemaId(0) + .tableName("trace_ns.trace_table") + .snapshotId(123L) + .addProjectedFieldIds(1) + .addProjectedFieldNames("id") + .filter(Expressions.alwaysTrue()) + .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) + .putMetadata("trace-id", clientTraceId) + .putMetadata("client-app", "spark-test") + .build(); + + ReportMetricsRequest metricsRequest = ReportMetricsRequest.of(scanReport); + + RestApi catalogApi = client.catalogApi(authToken); + try (Response response = + catalogApi + .request("v1/" + catalogName + "/namespaces/trace_ns/tables/trace_table/metrics") + .header("X-Request-ID", "trace-test-456") + .post(Entity.json(metricsRequest))) { + assertThat(response).returns(204, Response::getStatus); + } + + // Query for the AFTER_REPORT_METRICS event + String query = + "SELECT * FROM polaris_schema.events WHERE realm_id = '" + + realm + + "' AND event_type = 'AFTER_REPORT_METRICS' AND request_id = 'trace-test-456'"; + + List metricsEvents = + await() + .atMost(Duration.ofSeconds(10)) + .until( + () -> { + ImmutableList.Builder e = ImmutableList.builder(); + try (Connection connection = dataSource.get().getConnection(); + Statement statement = connection.createStatement(); + ResultSet rs = statement.executeQuery(query)) { + while (rs.next()) { + PolarisEvent event = CONVERTER.fromResultSet(rs); + e.add(event); + } + } + return e.build(); + }, + e -> !e.isEmpty()); + + PolarisEvent metricsEvent = metricsEvents.getFirst(); + assertThat(metricsEvent.getEventType()).isEqualTo("AFTER_REPORT_METRICS"); + + // Verify trace-id from report metadata is extracted with "report." prefix + Map additionalProps = metricsEvent.getAdditionalPropertiesAsMap(); + assertThat(additionalProps) + .containsEntry("report.trace-id", clientTraceId) + .containsEntry("report.client-app", "spark-test") + .containsEntry("report_type", "scan") + .containsEntry("snapshot_id", "123") + .containsEntry("schema_id", "0"); + } + + /** + * Tests that CommitReport metrics are properly extracted and stored. This verifies the commit + * metrics path including operation type, sequence number, and commit metrics. + */ + @Test + void testReportCommitMetrics() throws IOException { + String catalogName = client.newEntityName("testCommitMetrics"); + // Use a unique base location for this catalog to avoid overlap with other catalogs + URI catalogBaseLocation = baseLocation.resolve(catalogName + "/"); + + Catalog catalog = + PolarisCatalog.builder() + .setName(catalogName) + .setType(Catalog.TypeEnum.INTERNAL) + .setProperties(CatalogProperties.builder("file:///tmp/").build()) + .setStorageConfigInfo( + FileStorageConfigInfo.builder() + .setStorageType(StorageConfigInfo.StorageTypeEnum.FILE) + .setAllowedLocations(List.of(catalogBaseLocation.toString())) + .build()) + .build(); + + try (Response response = managementApi.request("v1/catalogs").post(Entity.json(catalog))) { + assertThat(response).returns(Response.Status.CREATED.getStatusCode(), Response::getStatus); + } + + // Create a table first + try (RESTSessionCatalog sessionCatalog = new RESTSessionCatalog()) { + sessionCatalog.initialize( + "polaris_catalog_commit_test", + ImmutableMap.builder() + .put("uri", endpoints.catalogApiEndpoint().toString()) + .put(OAuth2Properties.TOKEN, authToken) + .put("warehouse", catalogName) + .putAll(endpoints.extraHeaders("header.")) + .build()); + + SessionCatalog.SessionContext sessionContext = SessionCatalog.SessionContext.createEmpty(); + Namespace ns = Namespace.of("commit_ns"); + sessionCatalog.createNamespace(sessionContext, ns); + + sessionCatalog + .buildTable( + sessionContext, + TableIdentifier.of(ns, "commit_table"), + new Schema(List.of(Types.NestedField.required(1, "id", Types.IntegerType.get())))) + .withSortOrder(SortOrder.unsorted()) + .withPartitionSpec(PartitionSpec.unpartitioned()) + .create(); + } + + // Build a CommitReport + org.apache.iceberg.metrics.CommitReport commitReport = + org.apache.iceberg.metrics.ImmutableCommitReport.builder() + .tableName("commit_ns.commit_table") + .snapshotId(456L) + .sequenceNumber(1L) + .operation("append") + .commitMetrics( + org.apache.iceberg.metrics.CommitMetricsResult.from( + org.apache.iceberg.metrics.CommitMetrics.noop(), ImmutableMap.of())) + .putMetadata("trace-id", "commit-trace-123") + .build(); + + ReportMetricsRequest metricsRequest = ReportMetricsRequest.of(commitReport); + + RestApi catalogApi = client.catalogApi(authToken); + try (Response response = + catalogApi + .request("v1/" + catalogName + "/namespaces/commit_ns/tables/commit_table/metrics") + .header("X-Request-ID", "commit-test-789") + .post(Entity.json(metricsRequest))) { + assertThat(response).returns(204, Response::getStatus); + } + + // Query for the AFTER_REPORT_METRICS event + String query = + "SELECT * FROM polaris_schema.events WHERE realm_id = '" + + realm + + "' AND event_type = 'AFTER_REPORT_METRICS' AND request_id = 'commit-test-789'"; + + List metricsEvents = + await() + .atMost(Duration.ofSeconds(10)) + .until( + () -> { + ImmutableList.Builder e = ImmutableList.builder(); + try (Connection connection = dataSource.get().getConnection(); + Statement statement = connection.createStatement(); + ResultSet rs = statement.executeQuery(query)) { + while (rs.next()) { + PolarisEvent event = CONVERTER.fromResultSet(rs); + e.add(event); + } + } + return e.build(); + }, + e -> !e.isEmpty()); + + PolarisEvent metricsEvent = metricsEvents.getFirst(); + assertThat(metricsEvent.getEventType()).isEqualTo("AFTER_REPORT_METRICS"); + assertThat(metricsEvent.getResourceIdentifier()).isEqualTo("commit_ns.commit_table"); + + // Verify commit report data is extracted + Map additionalProps = metricsEvent.getAdditionalPropertiesAsMap(); + assertThat(additionalProps) + .containsEntry("report_type", "commit") + .containsEntry("snapshot_id", "456") + .containsEntry("sequence_number", "1") + .containsEntry("operation", "append") + .containsEntry("report.trace-id", "commit-trace-123"); + } } From f73903a8f4bbd2e279f1ceb80179d55a13b5fec7 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 16 Jan 2026 21:39:43 -0800 Subject: [PATCH 21/67] Fix failing InMemoryBufferEventListenerIntegrationTest by enabling ENABLE_METRICS_EVENT_EMISSION feature flag The tests testReportCommitMetrics, testReportMetricsWithTraceIdInMetadata, and testReportMetricsEventWithTraceContext were timing out because they were waiting for AFTER_REPORT_METRICS events that were never being emitted. The ENABLE_METRICS_EVENT_EMISSION feature flag defaults to false, which causes the reportMetrics() method to skip event emission. This commit adds the flag to the test profile configuration to enable event emission during tests. --- .../inmemory/InMemoryBufferEventListenerIntegrationTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java index ebbbe618dc..05f6a1d54f 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java @@ -100,6 +100,7 @@ public Map getConfigOverrides() { .put("polaris.features.\"ALLOW_INSECURE_STORAGE_TYPES\"", "true") .put("polaris.features.\"SUPPORTED_CATALOG_STORAGE_TYPES\"", "[\"FILE\",\"S3\"]") .put("polaris.features.\"ALLOW_OVERLAPPING_CATALOG_URLS\"", "true") + .put("polaris.features.\"ENABLE_METRICS_EVENT_EMISSION\"", "true") .put("polaris.readiness.ignore-severe-issues", "true") .build(); } From 5785fadcffb5ff23bd67fbe8ac3e9d0403d53357 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Mon, 19 Jan 2026 09:39:37 -0800 Subject: [PATCH 22/67] feat: Add backward-compatible metrics processing system This commit implements a new metrics processing architecture while maintaining 100% backward compatibility with the main branch. Changes: - Restore main branch files (PolarisMetricsReporter, DefaultMetricsReporter, MetricsReportingConfiguration, DefaultMetricsReporterTest) - Add new MetricsProcessor interface with rich context - Add MetricsProcessingContext with realm ID, principal, request ID, OTel trace - Add MetricsProcessorConfiguration for type-safe configuration - Add built-in processors: NoopMetricsProcessor, LoggingMetricsProcessor, PersistenceMetricsProcessor - Add LegacyMetricsProcessor for backward compatibility with PolarisMetricsReporter - Add MetricsReporterToProcessorAdapter utility - Update ServiceProducers with dual configuration support (old and new paths) - Update IcebergCatalogAdapter to use MetricsProcessor - Update MetricsReportCleanupService to use new configuration - Delete feature branch files (CompositeMetricsReporter, EventsMetricsReporter, PersistingMetricsReporter and their tests) Backward compatibility: - Old configuration path (polaris.iceberg-metrics.reporting.type) continues to work - PolarisMetricsReporter interface unchanged and functional - Automatic fallback to LegacyMetricsProcessor when old config is used - Zero breaking changes for main branch users New features: - Rich context with realm ID, principal, request ID, OpenTelemetry trace - CDI-based extensibility via @Identifier annotations - Type-safe configuration - Direct processing (no events) - Built-in processors for common use cases --- .gitignore | 1 + .../core/config/FeatureConfiguration.java | 19 - .../src/main/resources/application.properties | 5 +- .../iceberg/IcebergCatalogAdapter.java | 12 +- ...ebergRestCatalogEventServiceDelegator.java | 38 +- .../service/config/ServiceProducers.java | 96 ++--- .../PolarisPersistenceEventListener.java | 60 --- .../reporting/CompositeMetricsReporter.java | 92 ----- .../reporting/DefaultMetricsReporter.java | 1 + .../reporting/EventsMetricsReporter.java | 206 ----------- .../reporting/LegacyMetricsProcessor.java | 78 ++++ .../reporting/LoggingMetricsProcessor.java | 81 ++++ .../reporting/MetricsProcessingContext.java | 67 ++++ .../service/reporting/MetricsProcessor.java | 78 ++++ .../MetricsProcessorConfiguration.java | 102 +++++ .../MetricsReportCleanupService.java | 24 +- .../MetricsReporterToProcessorAdapter.java | 60 +++ .../MetricsReportingConfiguration.java | 69 +--- .../reporting/NoopMetricsProcessor.java | 66 ++++ .../PersistenceMetricsProcessor.java | 179 +++++++++ .../reporting/PersistingMetricsReporter.java | 188 ---------- .../iceberg/ReportMetricsEventTest.java | 238 ------------ .../PolarisPersistenceEventListenerTest.java | 212 ----------- ...oryBufferEventListenerIntegrationTest.java | 348 ------------------ .../CompositeMetricsReporterTest.java | 133 ------- .../reporting/DefaultMetricsReporterTest.java | 1 + .../reporting/EventsMetricsReporterTest.java | 156 -------- .../PersistingMetricsReporterTest.java | 164 --------- .../apache/polaris/service/TestServices.java | 9 +- 29 files changed, 792 insertions(+), 1991 deletions(-) delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/CompositeMetricsReporter.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/EventsMetricsReporter.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/LegacyMetricsProcessor.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/LoggingMetricsProcessor.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReporterToProcessorAdapter.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/NoopMetricsProcessor.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistenceMetricsProcessor.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java delete mode 100644 runtime/service/src/test/java/org/apache/polaris/service/catalog/iceberg/ReportMetricsEventTest.java delete mode 100644 runtime/service/src/test/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListenerTest.java delete mode 100644 runtime/service/src/test/java/org/apache/polaris/service/reporting/CompositeMetricsReporterTest.java delete mode 100644 runtime/service/src/test/java/org/apache/polaris/service/reporting/EventsMetricsReporterTest.java delete mode 100644 runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java diff --git a/.gitignore b/.gitignore index 2920730cd3..72771fae96 100644 --- a/.gitignore +++ b/.gitignore @@ -112,3 +112,4 @@ venv # to override default properties for local development. # And then use `./gradlew run -Dquarkus.profile=local` to run Polaris with dev profile. application-local.properties +.polaris-work-notes.md diff --git a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java index 90d5e16afa..d9fe3d5641 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java @@ -540,23 +540,4 @@ public static void enforceFeatureEnabledOrThrow( .description("Metadata batch size for tasks that clean up dropped tables' files.") .defaultValue(10) .buildFeatureConfiguration(); - - /** - * Feature flag to control the emission of BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events - * when the Iceberg REST catalog API's reportMetrics() method is called. When disabled (default), - * the reportMetrics() method calls the delegate directly without emitting any events. When - * enabled, BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events are emitted, allowing event - * listeners to receive metrics report data for use cases like audit logging and metrics - * persistence. - */ - public static final FeatureConfiguration ENABLE_METRICS_EVENT_EMISSION = - PolarisConfiguration.builder() - .key("ENABLE_METRICS_EVENT_EMISSION") - .description( - "If set to true, emit BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events when " - + "the reportMetrics() API is called. This enables event listeners to receive " - + "metrics report data for use cases like audit logging and metrics persistence. " - + "Defaults to false to ensure backward compatibility.") - .defaultValue(false) - .buildFeatureConfiguration(); } diff --git a/runtime/defaults/src/main/resources/application.properties b/runtime/defaults/src/main/resources/application.properties index 11dd4449f4..60a658e028 100644 --- a/runtime/defaults/src/main/resources/application.properties +++ b/runtime/defaults/src/main/resources/application.properties @@ -266,8 +266,11 @@ polaris.oidc.principal-roles-mapper.type=default # Polaris Credential Manager Config polaris.credential-manager.type=default -# Configuration for the behaviour of the metrics endpoint +# Configuration for metrics processing +# Legacy configuration (backward compatible with main branch) polaris.iceberg-metrics.reporting.type=default +# New configuration (optional, defaults to noop) +polaris.metrics.processor.type=noop # Set to INFO if you want to see iceberg metric reports logged quarkus.log.category."org.apache.polaris.service.reporting".level=OFF diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java index a5bfe58f27..fb54b5572d 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java @@ -71,7 +71,6 @@ import org.apache.polaris.service.catalog.io.StorageAccessConfigProvider; import org.apache.polaris.service.config.ReservedProperties; import org.apache.polaris.service.context.catalog.CallContextCatalogFactory; -import org.apache.polaris.service.events.EventAttributeMap; import org.apache.polaris.service.http.IcebergHttpUtil; import org.apache.polaris.service.http.IfNoneMatch; import org.apache.polaris.service.reporting.PolarisMetricsReporter; @@ -108,7 +107,6 @@ public class IcebergCatalogAdapter private final StorageAccessConfigProvider storageAccessConfigProvider; private final PolarisMetricsReporter metricsReporter; private final Clock clock; - private final EventAttributeMap eventAttributeMap; @Inject public IcebergCatalogAdapter( @@ -127,8 +125,7 @@ public IcebergCatalogAdapter( @Any Instance externalCatalogFactories, StorageAccessConfigProvider storageAccessConfigProvider, PolarisMetricsReporter metricsReporter, - Clock clock, - EventAttributeMap eventAttributeMap) { + Clock clock) { this.diagnostics = diagnostics; this.realmContext = realmContext; this.callContext = callContext; @@ -146,7 +143,6 @@ public IcebergCatalogAdapter( this.storageAccessConfigProvider = storageAccessConfigProvider; this.metricsReporter = metricsReporter; this.clock = clock; - this.eventAttributeMap = eventAttributeMap; } /** @@ -195,8 +191,7 @@ IcebergCatalogHandler newHandlerWrapper(SecurityContext securityContext, String reservedProperties, catalogHandlerUtils, externalCatalogFactories, - storageAccessConfigProvider, - eventAttributeMap); + storageAccessConfigProvider); } @Override @@ -727,9 +722,6 @@ public Response reportMetrics( ReportMetricsRequest reportMetricsRequest, RealmContext realmContext, SecurityContext securityContext) { - // Validate that the caller is authenticated (consistent with other endpoints) - validatePrincipal(securityContext); - String catalogName = prefixParser.prefixToCatalogName(realmContext, prefix); Namespace ns = decodeNamespace(namespace); TableIdentifier tableIdentifier = TableIdentifier.of(ns, RESTUtil.decodeString(table)); diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java index 25179b8b25..c5f526e756 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java @@ -826,41 +826,9 @@ public Response reportMetrics( ReportMetricsRequest reportMetricsRequest, RealmContext realmContext, SecurityContext securityContext) { - // Check if metrics event emission is enabled - boolean metricsEventEmissionEnabled = - realmConfig.getConfig(FeatureConfiguration.ENABLE_METRICS_EVENT_EMISSION); - - // If metrics event emission is disabled, call delegate directly without emitting events - if (!metricsEventEmissionEnabled) { - return delegate.reportMetrics( - prefix, namespace, table, reportMetricsRequest, realmContext, securityContext); - } - - // Emit events when feature is enabled - String catalogName = prefixParser.prefixToCatalogName(realmContext, prefix); - Namespace namespaceObj = decodeNamespace(namespace); - polarisEventListener.onEvent( - new PolarisEvent( - PolarisEventType.BEFORE_REPORT_METRICS, - eventMetadataFactory.create(), - new AttributeMap() - .put(EventAttributes.CATALOG_NAME, catalogName) - .put(EventAttributes.NAMESPACE, namespaceObj) - .put(EventAttributes.TABLE_NAME, table) - .put(EventAttributes.REPORT_METRICS_REQUEST, reportMetricsRequest))); - Response resp = - delegate.reportMetrics( - prefix, namespace, table, reportMetricsRequest, realmContext, securityContext); - polarisEventListener.onEvent( - new PolarisEvent( - PolarisEventType.AFTER_REPORT_METRICS, - eventMetadataFactory.create(), - new AttributeMap() - .put(EventAttributes.CATALOG_NAME, catalogName) - .put(EventAttributes.NAMESPACE, namespaceObj) - .put(EventAttributes.TABLE_NAME, table) - .put(EventAttributes.REPORT_METRICS_REQUEST, reportMetricsRequest))); - return resp; + // Metrics processing is now handled directly in IcebergCatalogAdapter + return delegate.reportMetrics( + prefix, namespace, table, reportMetricsRequest, realmContext, securityContext); } @Override diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 3ecfdc8074..8df67c738f 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -81,7 +81,8 @@ import org.apache.polaris.service.ratelimiter.RateLimiterFilterConfiguration; import org.apache.polaris.service.ratelimiter.TokenBucketConfiguration; import org.apache.polaris.service.ratelimiter.TokenBucketFactory; -import org.apache.polaris.service.reporting.CompositeMetricsReporter; +import org.apache.polaris.service.reporting.MetricsProcessor; +import org.apache.polaris.service.reporting.MetricsProcessorConfiguration; import org.apache.polaris.service.reporting.MetricsReportingConfiguration; import org.apache.polaris.service.reporting.PolarisMetricsReporter; import org.apache.polaris.service.secrets.SecretsManagerConfiguration; @@ -434,61 +435,72 @@ public void closeTaskExecutor(@Disposes @Identifier("task-executor") ManagedExec executor.close(); } + /** + * Produces the legacy {@link PolarisMetricsReporter} for backward compatibility. + * + *

    This producer supports the old configuration path: {@code + * polaris.iceberg-metrics.reporting.type} + * + *

    The reporter is selected based on the configured type using CDI {@link Identifier} + * annotations. + */ @Produces @ApplicationScoped public PolarisMetricsReporter metricsReporter( MetricsReportingConfiguration config, @Any Instance reporters) { String type = config.type(); + LOGGER.info("Initializing legacy metrics reporter: type={}", type); - if ("composite".equals(type)) { - List targets = config.targets(); - if (targets == null || targets.isEmpty()) { - LOGGER.warn( - "Composite metrics reporter configured but no targets specified. " - + "Falling back to default reporter."); - return reporters.select(Identifier.Literal.of("default")).get(); - } - - List delegates = new ArrayList<>(); - for (String target : targets) { - if (target == null || target.isBlank()) { - continue; - } - String trimmedTarget = target.trim(); - // Avoid infinite recursion - don't allow composite as a target - if ("composite".equals(trimmedTarget)) { - LOGGER.warn("Ignoring 'composite' as a target - would cause infinite recursion"); - continue; - } - try { - PolarisMetricsReporter delegate = - reporters.select(Identifier.Literal.of(trimmedTarget)).get(); - delegates.add(delegate); - LOGGER.info("Added metrics reporter target: {}", trimmedTarget); - } catch (Exception e) { - LOGGER.error( - "Failed to instantiate metrics reporter for target '{}': {}", - trimmedTarget, - e.getMessage()); - } - } - - if (delegates.isEmpty()) { - LOGGER.warn("No valid targets for composite reporter. Falling back to default reporter."); - return reporters.select(Identifier.Literal.of("default")).get(); - } + try { + PolarisMetricsReporter reporter = reporters.select(Identifier.Literal.of(type)).get(); + LOGGER.info("Successfully initialized legacy metrics reporter: {}", type); + return reporter; + } catch (Exception e) { + LOGGER.error( + "Failed to instantiate metrics reporter for type '{}': {}. Falling back to default.", + type, + e.getMessage()); + return reporters.select(Identifier.Literal.of("default")).get(); + } + } - return new CompositeMetricsReporter(delegates); + /** + * Produces the new {@link MetricsProcessor} for metrics processing. + * + *

    This producer supports the new configuration path: {@code polaris.metrics.processor.type} + * + *

    The processor is selected based on the configured type using CDI {@link Identifier} + * annotations. If the new configuration is not specified, it falls back to using the legacy + * {@link PolarisMetricsReporter} via the "legacy" processor. + */ + @Produces + @ApplicationScoped + public MetricsProcessor metricsProcessor( + MetricsProcessorConfiguration processorConfig, + MetricsReportingConfiguration reporterConfig, + @Any Instance processors) { + String type = processorConfig.type(); + LOGGER.info("Initializing metrics processor: type={}", type); + + // If using default "noop" processor but old config is set, use legacy processor + if ("noop".equals(type) && !"default".equals(reporterConfig.type())) { + LOGGER.info( + "New processor config is 'noop' but legacy reporter config is '{}'. " + + "Using legacy processor for backward compatibility.", + reporterConfig.type()); + type = "legacy"; } try { - return reporters.select(Identifier.Literal.of(type)).get(); + MetricsProcessor processor = processors.select(Identifier.Literal.of(type)).get(); + LOGGER.info("Successfully initialized metrics processor: {}", type); + return processor; } catch (Exception e) { LOGGER.error( - "Failed to instantiate metrics reporter for type '{}': {}. Falling back to default.", + "Failed to instantiate metrics processor for type '{}': {}. Falling back to noop.", type, e.getMessage()); - return reporters.select(Identifier.Literal.of("default")).get(); + return processors.select(Identifier.Literal.of("noop")).get(); } } } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListener.java b/runtime/service/src/main/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListener.java index 4f6105313c..e073e93447 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListener.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListener.java @@ -25,10 +25,6 @@ import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.CommitReport; -import org.apache.iceberg.metrics.MetricsReport; -import org.apache.iceberg.metrics.ScanReport; -import org.apache.iceberg.rest.requests.ReportMetricsRequest; import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.polaris.core.admin.model.Catalog; import org.apache.polaris.core.auth.PolarisPrincipal; @@ -42,7 +38,6 @@ public void onEvent(PolarisEvent event) { switch (event.type()) { case AFTER_CREATE_TABLE -> handleAfterCreateTable(event); case AFTER_CREATE_CATALOG -> handleAfterCreateCatalog(event); - case AFTER_REPORT_METRICS -> handleAfterReportMetrics(event); default -> { // Other events not handled by this listener } @@ -95,61 +90,6 @@ private void handleAfterCreateCatalog(PolarisEvent event) { processEvent(event.metadata().realmId(), polarisEvent); } - private void handleAfterReportMetrics(PolarisEvent event) { - ReportMetricsRequest request = - event.attributes().getRequired(EventAttributes.REPORT_METRICS_REQUEST); - String catalogName = event.attributes().getRequired(EventAttributes.CATALOG_NAME); - Namespace namespace = event.attributes().getRequired(EventAttributes.NAMESPACE); - String tableName = event.attributes().getRequired(EventAttributes.TABLE_NAME); - - org.apache.polaris.core.entity.PolarisEvent polarisEvent = - new org.apache.polaris.core.entity.PolarisEvent( - catalogName, - event.metadata().eventId().toString(), - event.metadata().requestId().orElse(null), - event.type().name(), - event.metadata().timestamp().toEpochMilli(), - event.metadata().user().map(PolarisPrincipal::getName).orElse(null), - org.apache.polaris.core.entity.PolarisEvent.ResourceType.TABLE, - TableIdentifier.of(namespace, tableName).toString()); - - var additionalParameters = ImmutableMap.builder(); - MetricsReport report = request.report(); - if (report instanceof ScanReport scanReport) { - additionalParameters.put("report_type", "scan"); - additionalParameters.put("snapshot_id", String.valueOf(scanReport.snapshotId())); - additionalParameters.put("schema_id", String.valueOf(scanReport.schemaId())); - Map metadata = scanReport.metadata(); - if (metadata != null) { - metadata.forEach( - (key, value) -> { - if (value != null) { - additionalParameters.put("report." + key, value); - } - }); - } - } else if (report instanceof CommitReport commitReport) { - additionalParameters.put("report_type", "commit"); - additionalParameters.put("snapshot_id", String.valueOf(commitReport.snapshotId())); - additionalParameters.put("sequence_number", String.valueOf(commitReport.sequenceNumber())); - if (commitReport.operation() != null) { - additionalParameters.put("operation", commitReport.operation()); - } - Map metadata = commitReport.metadata(); - if (metadata != null) { - metadata.forEach( - (key, value) -> { - if (value != null) { - additionalParameters.put("report." + key, value); - } - }); - } - } - additionalParameters.putAll(event.metadata().openTelemetryContext()); - polarisEvent.setAdditionalProperties(additionalParameters.build()); - processEvent(event.metadata().realmId(), polarisEvent); - } - protected abstract void processEvent( String realmId, org.apache.polaris.core.entity.PolarisEvent event); } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/CompositeMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/CompositeMetricsReporter.java deleted file mode 100644 index fbad83128e..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/CompositeMetricsReporter.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import java.util.List; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.MetricsReport; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A composite metrics reporter that delegates to multiple child reporters. This allows metrics to - * be sent to multiple destinations simultaneously, such as both the events table and dedicated - * metrics tables. - * - *

    To enable this reporter, set the configuration: - * - *

    - * polaris:
    - *   iceberg-metrics:
    - *     reporting:
    - *       type: composite
    - *       targets:
    - *         - events       # Write to events table
    - *         - persistence  # Write to dedicated tables
    - * 
    - * - *

    The composite reporter will call each configured target reporter in order. If one reporter - * fails, the others will still be called. - */ -public class CompositeMetricsReporter implements PolarisMetricsReporter { - - private static final Logger LOGGER = LoggerFactory.getLogger(CompositeMetricsReporter.class); - - private final List delegates; - - /** - * Creates a composite reporter with the given delegate reporters. - * - * @param delegates the list of reporters to delegate to - */ - public CompositeMetricsReporter(List delegates) { - this.delegates = List.copyOf(delegates); - LOGGER.info( - "CompositeMetricsReporter initialized with {} delegate(s): {}", - delegates.size(), - delegates.stream().map(r -> r.getClass().getSimpleName()).toList()); - } - - @Override - public void reportMetric(String catalogName, TableIdentifier table, MetricsReport metricsReport) { - for (PolarisMetricsReporter delegate : delegates) { - try { - delegate.reportMetric(catalogName, table, metricsReport); - } catch (Exception e) { - LOGGER.error( - "Delegate reporter {} failed for table {}.{}: {}", - delegate.getClass().getSimpleName(), - catalogName, - table, - e.getMessage(), - e); - // Continue with other delegates even if one fails - } - } - } - - /** - * Returns the list of delegate reporters. - * - * @return unmodifiable list of delegates - */ - public List getDelegates() { - return delegates; - } -} diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/DefaultMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/DefaultMetricsReporter.java index eaff0219b4..d08e9b0b46 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/DefaultMetricsReporter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/DefaultMetricsReporter.java @@ -74,3 +74,4 @@ public void reportMetric( reportConsumer.accept(catalogName, table, metricsReport, receivedTimestamp); } } + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/EventsMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/EventsMetricsReporter.java deleted file mode 100644 index 686f2dde82..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/EventsMetricsReporter.java +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import io.opentelemetry.api.trace.Span; -import io.opentelemetry.api.trace.SpanContext; -import io.quarkus.security.identity.SecurityIdentity; -import io.smallrye.common.annotation.Identifier; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.enterprise.inject.Instance; -import jakarta.inject.Inject; -import java.security.Principal; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.CommitReport; -import org.apache.iceberg.metrics.MetricsReport; -import org.apache.iceberg.metrics.ScanReport; -import org.apache.polaris.core.context.RealmContext; -import org.apache.polaris.core.entity.PolarisEvent; -import org.apache.polaris.core.persistence.BasePersistence; -import org.apache.polaris.core.persistence.MetaStoreManagerFactory; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A metrics reporter that persists scan and commit reports to the events table as JSON. This - * provides a unified audit trail where metrics are stored alongside other catalog events. - * - *

    To enable this reporter, set the configuration: - * - *

    - * polaris:
    - *   iceberg-metrics:
    - *     reporting:
    - *       type: events
    - * 
    - * - *

    Or use it as part of a composite reporter: - * - *

    - * polaris:
    - *   iceberg-metrics:
    - *     reporting:
    - *       type: composite
    - *       targets:
    - *         - events
    - *         - persistence
    - * 
    - */ -@ApplicationScoped -@Identifier("events") -public class EventsMetricsReporter implements PolarisMetricsReporter { - - private static final Logger LOGGER = LoggerFactory.getLogger(EventsMetricsReporter.class); - - public static final String EVENT_TYPE_SCAN_REPORT = "ScanReport"; - public static final String EVENT_TYPE_COMMIT_REPORT = "CommitReport"; - - private final MetaStoreManagerFactory metaStoreManagerFactory; - private final RealmContext realmContext; - private final ObjectMapper objectMapper; - private final Instance securityIdentityInstance; - - @Inject - public EventsMetricsReporter( - MetaStoreManagerFactory metaStoreManagerFactory, - RealmContext realmContext, - ObjectMapper objectMapper, - Instance securityIdentityInstance) { - this.metaStoreManagerFactory = metaStoreManagerFactory; - this.realmContext = realmContext; - this.objectMapper = objectMapper; - this.securityIdentityInstance = securityIdentityInstance; - } - - @Override - public void reportMetric(String catalogName, TableIdentifier table, MetricsReport metricsReport) { - try { - String eventType; - PolarisEvent.ResourceType resourceType = PolarisEvent.ResourceType.TABLE; - String resourceIdentifier = table.toString(); - - if (metricsReport instanceof ScanReport) { - eventType = EVENT_TYPE_SCAN_REPORT; - } else if (metricsReport instanceof CommitReport) { - eventType = EVENT_TYPE_COMMIT_REPORT; - } else { - LOGGER.warn("Unknown metrics report type: {}", metricsReport.getClass().getName()); - return; - } - - // Extract principal name from security context - String principalName = extractPrincipalName(); - - // Extract OpenTelemetry trace context - String otelTraceId = null; - String otelSpanId = null; - Span currentSpan = Span.current(); - if (currentSpan != null) { - SpanContext spanContext = currentSpan.getSpanContext(); - if (spanContext != null && spanContext.isValid()) { - otelTraceId = spanContext.getTraceId(); - otelSpanId = spanContext.getSpanId(); - } - } - - // Serialize the metrics report and add trace context to additional properties - Map additionalProps = new HashMap<>(); - additionalProps.put("metricsReport", serializeMetricsReportToMap(metricsReport)); - if (otelTraceId != null) { - additionalProps.put("otelTraceId", otelTraceId); - } - if (otelSpanId != null) { - additionalProps.put("otelSpanId", otelSpanId); - } - String additionalPropsJson = serializeToJson(additionalProps); - - PolarisEvent event = - new PolarisEvent( - catalogName, - UUID.randomUUID().toString(), - null, // requestId - could be extracted from context if available - eventType, - System.currentTimeMillis(), - principalName, - resourceType, - resourceIdentifier); - event.setAdditionalProperties(additionalPropsJson); - - // Get the persistence session for the current realm and write the event - BasePersistence session = metaStoreManagerFactory.getOrCreateSession(realmContext); - session.writeEvents(List.of(event)); - - LOGGER.debug("Persisted {} event for table {}.{}", eventType, catalogName, table); - } catch (Exception e) { - LOGGER.error( - "Failed to persist metrics event for table {}.{}: {}", - catalogName, - table, - e.getMessage(), - e); - } - } - - /** - * Extracts the principal name from the current security context. - * - * @return the principal name, or null if not available - */ - private String extractPrincipalName() { - try { - if (securityIdentityInstance.isResolvable()) { - SecurityIdentity identity = securityIdentityInstance.get(); - if (identity != null && !identity.isAnonymous()) { - Principal principal = identity.getPrincipal(); - if (principal != null) { - return principal.getName(); - } - } - } - } catch (Exception e) { - LOGGER.trace("Could not extract principal name from security context: {}", e.getMessage()); - } - return null; - } - - private Object serializeMetricsReportToMap(MetricsReport metricsReport) { - try { - String json = objectMapper.writeValueAsString(metricsReport); - return objectMapper.readValue(json, Object.class); - } catch (JsonProcessingException e) { - LOGGER.warn("Failed to serialize metrics report: {}", e.getMessage()); - return Map.of(); - } - } - - private String serializeToJson(Object obj) { - try { - return objectMapper.writeValueAsString(obj); - } catch (JsonProcessingException e) { - LOGGER.warn("Failed to serialize to JSON: {}", e.getMessage()); - return "{}"; - } - } -} diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/LegacyMetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/LegacyMetricsProcessor.java new file mode 100644 index 0000000000..3e97aedea8 --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/LegacyMetricsProcessor.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import io.smallrye.common.annotation.Identifier; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@link MetricsProcessor} that delegates to the legacy {@link PolarisMetricsReporter} for + * backward compatibility. + * + *

    This processor is used when the old configuration path {@code + * polaris.iceberg-metrics.reporting.type} is specified. It wraps the configured {@link + * PolarisMetricsReporter} and adapts it to the new {@link MetricsProcessor} interface. + * + *

    This allows existing configurations to continue working without changes during the migration + * to the new metrics processing system. + * + *

    To use this processor with the new configuration: + * + *

    + * polaris:
    + *   metrics:
    + *     processor:
    + *       type: legacy
    + * 
    + * + *

    Or continue using the old configuration (automatically mapped to this processor): + * + *

    + * polaris:
    + *   iceberg-metrics:
    + *     reporting:
    + *       type: default
    + * 
    + */ +@ApplicationScoped +@Identifier("legacy") +public class LegacyMetricsProcessor implements MetricsProcessor { + + private static final Logger LOGGER = LoggerFactory.getLogger(LegacyMetricsProcessor.class); + + private final PolarisMetricsReporter reporter; + + @Inject + public LegacyMetricsProcessor(PolarisMetricsReporter reporter) { + this.reporter = reporter; + LOGGER.info( + "LegacyMetricsProcessor initialized with reporter: {}", reporter.getClass().getName()); + } + + @Override + public void process(MetricsProcessingContext context) { + // Delegate to the legacy reporter with basic parameters + reporter.reportMetric( + context.catalogName(), context.tableIdentifier(), context.metricsReport()); + } +} + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/LoggingMetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/LoggingMetricsProcessor.java new file mode 100644 index 0000000000..584ef921ca --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/LoggingMetricsProcessor.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import io.smallrye.common.annotation.Identifier; +import jakarta.enterprise.context.ApplicationScoped; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@link MetricsProcessor} implementation that logs metrics to the console. + * + *

    This processor logs all metrics reports at INFO level, including the full context + * information such as realm ID, principal, request ID, and OpenTelemetry trace context. + * + *

    This processor is useful for: + * + *

      + *
    • Development and debugging + *
    • Understanding what metrics are being reported + *
    • Troubleshooting metrics processing issues + *
    + * + *

    Configuration: + * + *

    + * polaris:
    + *   metrics:
    + *     processor:
    + *       type: logging
    + * 
    + * + *

    To see the logs, ensure the logging level is set appropriately: + * + *

    + * quarkus.log.category."org.apache.polaris.service.reporting".level=INFO
    + * 
    + */ +@ApplicationScoped +@Identifier("logging") +public class LoggingMetricsProcessor implements MetricsProcessor { + + private static final Logger LOGGER = LoggerFactory.getLogger(LoggingMetricsProcessor.class); + + public LoggingMetricsProcessor() { + LOGGER.info("LoggingMetricsProcessor initialized - metrics will be logged to console"); + } + + @Override + public void process(MetricsProcessingContext context) { + LOGGER.info( + "Metrics Report: catalog={}, table={}, realm={}, principal={}, requestId={}, " + + "traceId={}, spanId={}, timestamp={}, report={}", + context.catalogName(), + context.tableIdentifier(), + context.realmId(), + context.principalName().orElse("unknown"), + context.requestId().orElse("none"), + context.otelTraceId().orElse("none"), + context.otelSpanId().orElse("none"), + context.timestampMs(), + context.metricsReport()); + } +} + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java new file mode 100644 index 0000000000..629d43362f --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import java.util.Optional; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.MetricsReport; +import org.immutables.value.Value; + +/** + * Context information for metrics processing, providing access to request metadata, security + * context, and tracing information. + * + *

    This immutable context object contains all the information needed to process and persist + * Iceberg metrics reports, including catalog and table identifiers, the metrics report itself, and + * associated metadata like principal name, request ID, and OpenTelemetry trace context. + */ +@Value.Immutable +public interface MetricsProcessingContext { + + /** The catalog name where the metrics originated */ + String catalogName(); + + /** The table identifier */ + TableIdentifier tableIdentifier(); + + /** The Iceberg metrics report (ScanReport or CommitReport) */ + MetricsReport metricsReport(); + + /** The realm ID */ + String realmId(); + + /** The catalog ID (internal entity ID) */ + Optional catalogId(); + + /** The principal name who submitted the metrics */ + Optional principalName(); + + /** The request ID for correlation */ + Optional requestId(); + + /** OpenTelemetry trace ID */ + Optional otelTraceId(); + + /** OpenTelemetry span ID */ + Optional otelSpanId(); + + /** Timestamp when metrics were received (milliseconds since epoch) */ + long timestampMs(); +} + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java new file mode 100644 index 0000000000..0071df16cc --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +/** + * Interface for processing Iceberg metrics reports in Polaris. + * + *

    This interface provides a pluggable mechanism for handling metrics reports from Iceberg table + * operations. Implementations can persist metrics to various backends, forward them to external + * systems, or perform custom processing. + * + *

    Processors are discovered via CDI using the {@link io.smallrye.common.annotation.Identifier} + * annotation. Custom processors can be implemented and registered by annotating them with {@code + * @ApplicationScoped} and {@code @Identifier("custom-name")}. + * + *

    Available built-in processors: + * + *

      + *
    • {@code noop} - Discards all metrics (default) + *
    • {@code logging} - Logs metrics to console for debugging + *
    • {@code persistence} - Persists to dedicated metrics tables + *
    + * + *

    Example configuration: + * + *

    + * polaris:
    + *   metrics:
    + *     processor:
    + *       type: persistence
    + * 
    + * + *

    Custom implementations should be annotated with: + * + *

    + * {@literal @}ApplicationScoped
    + * {@literal @}Identifier("custom-processor")
    + * public class CustomMetricsProcessor implements MetricsProcessor {
    + *   {@literal @}Override
    + *   public void process(MetricsProcessingContext context) {
    + *     // implementation
    + *   }
    + * }
    + * 
    + * + * @see MetricsProcessingContext + * @see MetricsProcessorConfiguration + */ +public interface MetricsProcessor { + + /** + * Process a metrics report with full context information. + * + *

    Implementations should handle exceptions gracefully and not throw exceptions that would + * disrupt the metrics reporting flow. Errors should be logged and metrics about processing + * failures should be emitted. + * + * @param context the complete context for metrics processing + */ + void process(MetricsProcessingContext context); +} + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java new file mode 100644 index 0000000000..ba55985058 --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import io.smallrye.config.ConfigMapping; +import io.smallrye.config.WithDefault; +import java.time.Duration; +import java.util.Optional; + +/** + * Configuration for metrics processing in Polaris. + * + *

    This configuration controls how Iceberg metrics reports are processed and persisted. The + * processor type determines which implementation is used. + * + *

    Example configuration: + * + *

    + * polaris:
    + *   metrics:
    + *     processor:
    + *       type: persistence
    + *       retention:
    + *         enabled: true
    + *         retention-period: P30D
    + *         cleanup-interval: PT6H
    + * 
    + */ +@ConfigMapping(prefix = "polaris.metrics.processor") +public interface MetricsProcessorConfiguration { + + /** + * The type of metrics processor to use. + * + *

    Supported built-in values: + * + *

      + *
    • {@code noop} - No processing, discards all metrics (default) + *
    • {@code logging} - Log metrics to console for debugging + *
    • {@code persistence} - Persist to dedicated metrics tables + *
    + * + *

    Custom processor types can be specified if a corresponding {@link MetricsProcessor} + * implementation is available with a matching {@link io.smallrye.common.annotation.Identifier}. + * + * @return the processor type identifier + */ + @WithDefault("noop") + String type(); + + /** + * Retention policy configuration for persisted metrics reports. + * + * @return the retention configuration + */ + Optional retention(); + + /** Retention policy configuration for metrics reports. */ + interface Retention { + + /** + * Whether automatic cleanup of old metrics reports is enabled. + * + * @return true if cleanup is enabled + */ + @WithDefault("false") + boolean enabled(); + + /** + * How long to retain metrics reports before they are eligible for deletion. + * + * @return the retention period (default: 30 days) + */ + @WithDefault("P30D") + Duration retentionPeriod(); + + /** + * How often to run the cleanup job. + * + * @return the cleanup interval (default: 6 hours) + */ + @WithDefault("PT6H") + Duration cleanupInterval(); + } +} + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java index 04bbca6365..8ddbcbdc1f 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java @@ -44,8 +44,8 @@ * *

      * polaris:
    - *   iceberg-metrics:
    - *     reporting:
    + *   metrics:
    + *     processor:
      *       type: persistence
      *       retention:
      *         enabled: true
    @@ -59,25 +59,25 @@ public class MetricsReportCleanupService {
     
       private static final Logger LOGGER = LoggerFactory.getLogger(MetricsReportCleanupService.class);
     
    -  private final MetricsReportingConfiguration config;
    +  private final MetricsProcessorConfiguration config;
       private final MetaStoreManagerFactory metaStoreManagerFactory;
       private final RealmContextConfiguration realmContextConfiguration;
       private final AtomicBoolean running = new AtomicBoolean(false);
     
       @Inject
       public MetricsReportCleanupService(
    -      MetricsReportingConfiguration config,
    +      MetricsProcessorConfiguration config,
           MetaStoreManagerFactory metaStoreManagerFactory,
           RealmContextConfiguration realmContextConfiguration) {
         this.config = config;
         this.metaStoreManagerFactory = metaStoreManagerFactory;
         this.realmContextConfiguration = realmContextConfiguration;
     
    -    if (config.retention().enabled()) {
    +    if (config.retention().isPresent() && config.retention().get().enabled()) {
           LOGGER.info(
               "Metrics report cleanup enabled with retention period: {}, cleanup interval: {}",
    -          config.retention().retentionPeriod(),
    -          config.retention().cleanupInterval());
    +          config.retention().get().retentionPeriod(),
    +          config.retention().get().cleanupInterval());
         } else {
           LOGGER.debug("Metrics report cleanup is disabled");
         }
    @@ -87,9 +87,9 @@ public MetricsReportCleanupService(
        * Scheduled cleanup job that runs at the configured interval. The actual interval is configured
        * via the retention.cleanup-interval property.
        */
    -  @Scheduled(every = "${polaris.iceberg-metrics.reporting.retention.cleanup-interval:6h}")
    +  @Scheduled(every = "${polaris.metrics.processor.retention.cleanup-interval:6h}")
       public void cleanupOldMetricsReports() {
    -    if (!config.retention().enabled()) {
    +    if (config.retention().isEmpty() || !config.retention().get().enabled()) {
           LOGGER.trace("Metrics cleanup is disabled, skipping");
           return;
         }
    @@ -108,7 +108,7 @@ public void cleanupOldMetricsReports() {
       }
     
       private void performCleanup() {
    -    Duration retentionPeriod = config.retention().retentionPeriod();
    +    Duration retentionPeriod = config.retention().get().retentionPeriod();
         long cutoffTimestamp = Instant.now().minus(retentionPeriod).toEpochMilli();
         List realmIds = realmContextConfiguration.realms();
     
    @@ -163,7 +163,7 @@ private int cleanupForRealm(String realmId, long cutoffTimestamp) {
        *     failed
        */
       public int triggerCleanup() {
    -    if (!config.retention().enabled()) {
    +    if (config.retention().isEmpty() || !config.retention().get().enabled()) {
           LOGGER.warn("Cannot trigger cleanup: retention is disabled");
           return -1;
         }
    @@ -174,7 +174,7 @@ public int triggerCleanup() {
         }
     
         try {
    -      Duration retentionPeriod = config.retention().retentionPeriod();
    +      Duration retentionPeriod = config.retention().get().retentionPeriod();
           long cutoffTimestamp = Instant.now().minus(retentionPeriod).toEpochMilli();
           List realmIds = realmContextConfiguration.realms();
     
    diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReporterToProcessorAdapter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReporterToProcessorAdapter.java
    new file mode 100644
    index 0000000000..b674dd709c
    --- /dev/null
    +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReporterToProcessorAdapter.java
    @@ -0,0 +1,60 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package org.apache.polaris.service.reporting;
    +
    +import org.apache.iceberg.catalog.TableIdentifier;
    +import org.apache.iceberg.metrics.MetricsReport;
    +
    +/**
    + * Adapter that bridges the legacy {@link PolarisMetricsReporter} interface to the new {@link
    + * MetricsProcessor} interface.
    + *
    + * 

    This adapter allows existing {@link PolarisMetricsReporter} implementations to work with the + * new {@link MetricsProcessor} system, providing backward compatibility during the migration + * period. + * + *

    The adapter converts the simple {@link PolarisMetricsReporter#reportMetric(String, + * TableIdentifier, MetricsReport)} call into a full {@link MetricsProcessingContext} by extracting + * available information from the current request context. + */ +public class MetricsReporterToProcessorAdapter implements MetricsProcessor { + + private final PolarisMetricsReporter reporter; + + public MetricsReporterToProcessorAdapter(PolarisMetricsReporter reporter) { + this.reporter = reporter; + } + + @Override + public void process(MetricsProcessingContext context) { + // Delegate to the legacy reporter interface with just the basic parameters + reporter.reportMetric( + context.catalogName(), context.tableIdentifier(), context.metricsReport()); + } + + /** + * Get the underlying legacy reporter. + * + * @return the wrapped PolarisMetricsReporter + */ + public PolarisMetricsReporter getReporter() { + return reporter; + } +} + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java index a041b3170c..6a0846969c 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java @@ -20,77 +20,10 @@ import io.smallrye.config.ConfigMapping; import io.smallrye.config.WithDefault; -import java.time.Duration; -import java.util.List; @ConfigMapping(prefix = "polaris.iceberg-metrics.reporting") public interface MetricsReportingConfiguration { - /** - * The type of metrics reporter to use. Supported values: - * - *

      - *
    • {@code default} - Log metrics to console only (no persistence) - *
    • {@code events} - Persist metrics to the events table as JSON - *
    • {@code persistence} - Persist metrics to dedicated tables (scan_metrics_report, - * commit_metrics_report) - *
    • {@code composite} - Use multiple reporters based on the {@link #targets()} configuration - *
    - * - * @return the reporter type - */ @WithDefault("default") String type(); - - /** - * List of reporter targets to use when {@link #type()} is set to {@code composite}. Each target - * corresponds to a reporter type: {@code default}, {@code events}, or {@code persistence}. - * - *

    Example configuration: - * - *

    -   * polaris:
    -   *   iceberg-metrics:
    -   *     reporting:
    -   *       type: composite
    -   *       targets:
    -   *         - events
    -   *         - persistence
    -   * 
    - * - * @return list of reporter targets, empty if not using composite type - */ - default List targets() { - return List.of(); - } - - /** Configuration for metrics retention and cleanup. */ - RetentionConfig retention(); - - interface RetentionConfig { - /** - * Whether automatic cleanup of old metrics reports is enabled. Default is false (disabled). - * - * @return true if cleanup is enabled - */ - @WithDefault("false") - boolean enabled(); - - /** - * How long to retain metrics reports before they are eligible for cleanup. Default is 30 days. - * Supports ISO-8601 duration format (e.g., "P30D" for 30 days, "PT24H" for 24 hours). - * - * @return the retention period - */ - @WithDefault("P30D") - Duration retentionPeriod(); - - /** - * How often to run the cleanup job. Default is every 6 hours. Supports ISO-8601 duration - * format. - * - * @return the cleanup interval - */ - @WithDefault("PT6H") - Duration cleanupInterval(); - } } + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/NoopMetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/NoopMetricsProcessor.java new file mode 100644 index 0000000000..d73475a28b --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/NoopMetricsProcessor.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import io.smallrye.common.annotation.Identifier; +import jakarta.enterprise.context.ApplicationScoped; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A no-op implementation of {@link MetricsProcessor} that discards all metrics. + * + *

    This is the default processor when no specific type is configured. It performs no processing + * and simply discards all metrics reports. + * + *

    This processor is useful when: + * + *

      + *
    • Metrics processing is not needed + *
    • You want to disable metrics processing temporarily + *
    • You're testing and don't want metrics overhead + *
    + * + *

    Configuration: + * + *

    + * polaris:
    + *   metrics:
    + *     processor:
    + *       type: noop
    + * 
    + */ +@ApplicationScoped +@Identifier("noop") +public class NoopMetricsProcessor implements MetricsProcessor { + + private static final Logger LOGGER = LoggerFactory.getLogger(NoopMetricsProcessor.class); + + public NoopMetricsProcessor() { + LOGGER.debug("NoopMetricsProcessor initialized - all metrics will be discarded"); + } + + @Override + public void process(MetricsProcessingContext context) { + // Intentionally do nothing - discard all metrics + LOGGER.trace( + "Discarding metrics for {}.{}", context.catalogName(), context.tableIdentifier()); + } +} + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistenceMetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistenceMetricsProcessor.java new file mode 100644 index 0000000000..21c18d5cde --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistenceMetricsProcessor.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import io.smallrye.common.annotation.Identifier; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.persistence.BasePersistence; +import org.apache.polaris.core.persistence.MetaStoreManagerFactory; +import org.apache.polaris.persistence.relational.jdbc.JdbcBasePersistenceImpl; +import org.apache.polaris.persistence.relational.jdbc.models.MetricsReportConverter; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; +import org.apache.polaris.service.context.RealmContextConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@link MetricsProcessor} that persists metrics to dedicated database tables. + * + *

    This processor stores Iceberg metrics reports in dedicated tables: + * + *

      + *
    • {@code scan_metrics_report} - For ScanReport metrics + *
    • {@code commit_metrics_report} - For CommitReport metrics + *
    + * + *

    The processor includes full context information such as realm ID, catalog ID, principal name, + * request ID, and OpenTelemetry trace context for correlation and analysis. + * + *

    Requirements: + * + *

      + *
    • Requires JDBC-based persistence backend ({@code polaris.persistence.type=relational-jdbc}) + *
    • Database schema must include metrics tables (created via Flyway migrations) + *
    + * + *

    Configuration: + * + *

    + * polaris:
    + *   metrics:
    + *     processor:
    + *       type: persistence
    + *       retention:
    + *         enabled: true
    + *         retention-period: P30D
    + *         cleanup-interval: PT6H
    + * 
    + */ +@ApplicationScoped +@Identifier("persistence") +public class PersistenceMetricsProcessor implements MetricsProcessor { + + private static final Logger LOGGER = LoggerFactory.getLogger(PersistenceMetricsProcessor.class); + + private final MetaStoreManagerFactory metaStoreManagerFactory; + private final RealmContextConfiguration realmContextConfiguration; + + @Inject + public PersistenceMetricsProcessor( + MetaStoreManagerFactory metaStoreManagerFactory, + RealmContextConfiguration realmContextConfiguration) { + this.metaStoreManagerFactory = metaStoreManagerFactory; + this.realmContextConfiguration = realmContextConfiguration; + LOGGER.info("PersistenceMetricsProcessor initialized - metrics will be persisted to database"); + } + + @Override + public void process(MetricsProcessingContext context) { + try { + // Get the persistence session for the realm + String realmId = context.realmId(); + RealmContext realmContext = () -> realmId; + BasePersistence session = metaStoreManagerFactory.getOrCreateSession(realmContext); + + // Only JDBC persistence supports metrics tables + if (!(session instanceof JdbcBasePersistenceImpl jdbcPersistence)) { + LOGGER.warn( + "Persistence metrics processor requires JDBC persistence backend. " + + "Current backend: {}. Metrics will not be persisted.", + session.getClass().getSimpleName()); + return; + } + + // Persist based on report type + if (context.metricsReport() instanceof ScanReport scanReport) { + persistScanReport(jdbcPersistence, context, scanReport); + } else if (context.metricsReport() instanceof CommitReport commitReport) { + persistCommitReport(jdbcPersistence, context, commitReport); + } else { + LOGGER.warn( + "Unknown metrics report type: {}. Metrics will not be persisted.", + context.metricsReport().getClass().getName()); + } + } catch (Exception e) { + LOGGER.error( + "Failed to persist metrics for {}.{}: {}", + context.catalogName(), + context.tableIdentifier(), + e.getMessage(), + e); + } + } + + private void persistScanReport( + JdbcBasePersistenceImpl jdbcPersistence, + MetricsProcessingContext context, + ScanReport scanReport) { + try { + String namespace = context.tableIdentifier().namespace().toString(); + String catalogId = context.catalogId().map(String::valueOf).orElse(null); + + ModelScanMetricsReport modelReport = + MetricsReportConverter.fromScanReport( + scanReport, + context.realmId(), + catalogId, + context.catalogName(), + namespace, + context.principalName().orElse(null), + context.requestId().orElse(null), + context.otelTraceId().orElse(null), + context.otelSpanId().orElse(null)); + + jdbcPersistence.writeScanMetricsReport(modelReport); + LOGGER.debug("Persisted scan metrics for {}.{}", context.catalogName(), context.tableIdentifier()); + } catch (Exception e) { + LOGGER.error("Failed to persist scan metrics: {}", e.getMessage(), e); + } + } + + private void persistCommitReport( + JdbcBasePersistenceImpl jdbcPersistence, + MetricsProcessingContext context, + CommitReport commitReport) { + try { + String namespace = context.tableIdentifier().namespace().toString(); + String catalogId = context.catalogId().map(String::valueOf).orElse(null); + + ModelCommitMetricsReport modelReport = + MetricsReportConverter.fromCommitReport( + commitReport, + context.realmId(), + catalogId, + context.catalogName(), + namespace, + context.principalName().orElse(null), + context.requestId().orElse(null), + context.otelTraceId().orElse(null), + context.otelSpanId().orElse(null)); + + jdbcPersistence.writeCommitMetricsReport(modelReport); + LOGGER.debug("Persisted commit metrics for {}.{}", context.catalogName(), context.tableIdentifier()); + } catch (Exception e) { + LOGGER.error("Failed to persist commit metrics: {}", e.getMessage(), e); + } + } +} + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java deleted file mode 100644 index 136a84d045..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import io.opentelemetry.api.trace.Span; -import io.opentelemetry.api.trace.SpanContext; -import io.quarkus.security.identity.SecurityIdentity; -import io.smallrye.common.annotation.Identifier; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.enterprise.inject.Instance; -import jakarta.inject.Inject; -import java.security.Principal; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.CommitReport; -import org.apache.iceberg.metrics.MetricsReport; -import org.apache.iceberg.metrics.ScanReport; -import org.apache.polaris.core.context.RealmContext; -import org.apache.polaris.core.persistence.BasePersistence; -import org.apache.polaris.core.persistence.MetaStoreManagerFactory; -import org.apache.polaris.persistence.relational.jdbc.JdbcBasePersistenceImpl; -import org.apache.polaris.persistence.relational.jdbc.models.MetricsReportConverter; -import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; -import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A metrics reporter that persists scan and commit reports as first-class entities in the database. - * This provides better queryability and analytics capabilities compared to storing metrics as - * generic events. - * - *

    To enable this reporter, set the configuration: - * - *

    - * polaris:
    - *   iceberg-metrics:
    - *     reporting:
    - *       type: persistence
    - * 
    - * - *

    Note: This reporter requires the relational-jdbc persistence backend. If a different - * persistence backend is configured, metrics will be logged but not persisted. - */ -@ApplicationScoped -@Identifier("persistence") -public class PersistingMetricsReporter implements PolarisMetricsReporter { - - private static final Logger LOGGER = LoggerFactory.getLogger(PersistingMetricsReporter.class); - - private final MetaStoreManagerFactory metaStoreManagerFactory; - private final RealmContext realmContext; - private final Instance securityIdentityInstance; - - @Inject - public PersistingMetricsReporter( - MetaStoreManagerFactory metaStoreManagerFactory, - RealmContext realmContext, - Instance securityIdentityInstance) { - this.metaStoreManagerFactory = metaStoreManagerFactory; - this.realmContext = realmContext; - this.securityIdentityInstance = securityIdentityInstance; - } - - @Override - public void reportMetric(String catalogName, TableIdentifier table, MetricsReport metricsReport) { - try { - String realmId = realmContext.getRealmIdentifier(); - String catalogId = catalogName; // Using catalog name as ID for now - String namespace = table.namespace().toString(); - - // Extract principal name from security context - String principalName = extractPrincipalName(); - String requestId = null; - - // Extract OpenTelemetry trace context from the current span - String otelTraceId = null; - String otelSpanId = null; - Span currentSpan = Span.current(); - if (currentSpan != null) { - SpanContext spanContext = currentSpan.getSpanContext(); - if (spanContext != null && spanContext.isValid()) { - otelTraceId = spanContext.getTraceId(); - otelSpanId = spanContext.getSpanId(); - LOGGER.trace( - "Captured OpenTelemetry context: traceId={}, spanId={}", otelTraceId, otelSpanId); - } - } - - // Get the persistence session for the current realm - BasePersistence session = metaStoreManagerFactory.getOrCreateSession(realmContext); - - // Check if the session is a JdbcBasePersistenceImpl (supports metrics persistence) - if (!(session instanceof JdbcBasePersistenceImpl jdbcPersistence)) { - LOGGER.warn( - "Metrics persistence is only supported with relational-jdbc backend. " - + "Current backend: {}. Logging metrics instead.", - session.getClass().getSimpleName()); - LOGGER.info("{}.{}: {}", catalogName, table, metricsReport); - return; - } - - if (metricsReport instanceof ScanReport scanReport) { - ModelScanMetricsReport modelReport = - MetricsReportConverter.fromScanReport( - scanReport, - realmId, - catalogId, - catalogName, - namespace, - principalName, - requestId, - otelTraceId, - otelSpanId); - jdbcPersistence.writeScanMetricsReport(modelReport); - LOGGER.debug( - "Persisted scan metrics report {} for table {}.{}", - modelReport.getReportId(), - catalogName, - table); - } else if (metricsReport instanceof CommitReport commitReport) { - ModelCommitMetricsReport modelReport = - MetricsReportConverter.fromCommitReport( - commitReport, - realmId, - catalogId, - catalogName, - namespace, - principalName, - requestId, - otelTraceId, - otelSpanId); - jdbcPersistence.writeCommitMetricsReport(modelReport); - LOGGER.debug( - "Persisted commit metrics report {} for table {}.{}", - modelReport.getReportId(), - catalogName, - table); - } else { - LOGGER.warn("Unknown metrics report type: {}", metricsReport.getClass().getName()); - } - } catch (Exception e) { - LOGGER.error( - "Failed to persist metrics report for table {}.{}: {}", - catalogName, - table, - e.getMessage(), - e); - } - } - - /** - * Extracts the principal name from the current security context. - * - * @return the principal name, or null if not available - */ - private String extractPrincipalName() { - try { - if (securityIdentityInstance.isResolvable()) { - SecurityIdentity identity = securityIdentityInstance.get(); - if (identity != null && !identity.isAnonymous()) { - Principal principal = identity.getPrincipal(); - if (principal != null) { - return principal.getName(); - } - } - } - } catch (Exception e) { - LOGGER.trace("Could not extract principal name from security context: {}", e.getMessage()); - } - return null; - } -} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/catalog/iceberg/ReportMetricsEventTest.java b/runtime/service/src/test/java/org/apache/polaris/service/catalog/iceberg/ReportMetricsEventTest.java deleted file mode 100644 index 7522347f28..0000000000 --- a/runtime/service/src/test/java/org/apache/polaris/service/catalog/iceberg/ReportMetricsEventTest.java +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.catalog.iceberg; - -import static org.apache.polaris.service.admin.PolarisAuthzTestBase.SCHEMA; -import static org.assertj.core.api.Assertions.assertThat; - -import jakarta.ws.rs.core.Response; -import java.nio.file.Path; -import java.util.List; -import java.util.Map; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.metrics.ImmutableScanReport; -import org.apache.iceberg.metrics.ScanMetrics; -import org.apache.iceberg.metrics.ScanMetricsResult; -import org.apache.iceberg.rest.requests.CreateNamespaceRequest; -import org.apache.iceberg.rest.requests.CreateTableRequest; -import org.apache.iceberg.rest.requests.ReportMetricsRequest; -import org.apache.polaris.core.admin.model.Catalog; -import org.apache.polaris.core.admin.model.CatalogProperties; -import org.apache.polaris.core.admin.model.CreateCatalogRequest; -import org.apache.polaris.core.admin.model.FileStorageConfigInfo; -import org.apache.polaris.core.admin.model.StorageConfigInfo; -import org.apache.polaris.service.TestServices; -import org.apache.polaris.service.events.EventAttributes; -import org.apache.polaris.service.events.PolarisEvent; -import org.apache.polaris.service.events.PolarisEventType; -import org.apache.polaris.service.events.listeners.TestPolarisEventListener; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -/** - * Unit tests for verifying that reportMetrics() emits BEFORE_REPORT_METRICS and - * AFTER_REPORT_METRICS events. - */ -public class ReportMetricsEventTest { - private static final String NAMESPACE = "test_ns"; - private static final String CATALOG = "test-catalog"; - private static final String TABLE = "test-table"; - - private String catalogLocation; - - @BeforeEach - public void setUp(@TempDir Path tempDir) { - catalogLocation = tempDir.toAbsolutePath().toUri().toString(); - if (catalogLocation.endsWith("/")) { - catalogLocation = catalogLocation.substring(0, catalogLocation.length() - 1); - } - } - - @Test - void testReportMetricsEmitsBeforeAndAfterEventsWhenEnabled() { - // Create test services with ENABLE_METRICS_EVENT_EMISSION enabled - TestServices testServices = createTestServicesWithMetricsEmissionEnabled(true); - createCatalogAndNamespace(testServices); - createTable(testServices, TABLE); - - // Create a ScanReport for testing - ImmutableScanReport scanReport = - ImmutableScanReport.builder() - .schemaId(0) - .tableName(NAMESPACE + "." + TABLE) - .snapshotId(100L) - .addProjectedFieldIds(1) - .addProjectedFieldNames("id") - .filter(Expressions.alwaysTrue()) - .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) - .build(); - - ReportMetricsRequest request = ReportMetricsRequest.of(scanReport); - - // Call reportMetrics - try (Response response = - testServices - .restApi() - .reportMetrics( - CATALOG, - NAMESPACE, - TABLE, - request, - testServices.realmContext(), - testServices.securityContext())) { - assertThat(response.getStatus()).isEqualTo(Response.Status.NO_CONTENT.getStatusCode()); - } - - // Verify that BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events were emitted - TestPolarisEventListener testEventListener = - (TestPolarisEventListener) testServices.polarisEventListener(); - - PolarisEvent beforeEvent = testEventListener.getLatest(PolarisEventType.BEFORE_REPORT_METRICS); - assertThat(beforeEvent).isNotNull(); - assertThat(beforeEvent.attributes().getRequired(EventAttributes.CATALOG_NAME)) - .isEqualTo(CATALOG); - assertThat(beforeEvent.attributes().getRequired(EventAttributes.NAMESPACE)) - .isEqualTo(Namespace.of(NAMESPACE)); - assertThat(beforeEvent.attributes().getRequired(EventAttributes.TABLE_NAME)).isEqualTo(TABLE); - assertThat(beforeEvent.attributes().getRequired(EventAttributes.REPORT_METRICS_REQUEST)) - .isNotNull(); - - PolarisEvent afterEvent = testEventListener.getLatest(PolarisEventType.AFTER_REPORT_METRICS); - assertThat(afterEvent).isNotNull(); - assertThat(afterEvent.attributes().getRequired(EventAttributes.CATALOG_NAME)) - .isEqualTo(CATALOG); - assertThat(afterEvent.attributes().getRequired(EventAttributes.NAMESPACE)) - .isEqualTo(Namespace.of(NAMESPACE)); - assertThat(afterEvent.attributes().getRequired(EventAttributes.TABLE_NAME)).isEqualTo(TABLE); - assertThat(afterEvent.attributes().getRequired(EventAttributes.REPORT_METRICS_REQUEST)) - .isNotNull(); - } - - @Test - void testReportMetricsDoesNotEmitEventsWhenDisabled() { - // Create test services with ENABLE_METRICS_EVENT_EMISSION disabled (default) - TestServices testServices = createTestServicesWithMetricsEmissionEnabled(false); - createCatalogAndNamespace(testServices); - createTable(testServices, TABLE); - - // Create a ScanReport for testing - ImmutableScanReport scanReport = - ImmutableScanReport.builder() - .schemaId(0) - .tableName(NAMESPACE + "." + TABLE) - .snapshotId(100L) - .addProjectedFieldIds(1) - .addProjectedFieldNames("id") - .filter(Expressions.alwaysTrue()) - .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) - .build(); - - ReportMetricsRequest request = ReportMetricsRequest.of(scanReport); - - // Call reportMetrics - try (Response response = - testServices - .restApi() - .reportMetrics( - CATALOG, - NAMESPACE, - TABLE, - request, - testServices.realmContext(), - testServices.securityContext())) { - assertThat(response.getStatus()).isEqualTo(Response.Status.NO_CONTENT.getStatusCode()); - } - - // Verify that BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS events were NOT emitted - TestPolarisEventListener testEventListener = - (TestPolarisEventListener) testServices.polarisEventListener(); - - assertThat(testEventListener.hasEvent(PolarisEventType.BEFORE_REPORT_METRICS)).isFalse(); - assertThat(testEventListener.hasEvent(PolarisEventType.AFTER_REPORT_METRICS)).isFalse(); - } - - private TestServices createTestServicesWithMetricsEmissionEnabled(boolean enabled) { - Map config = - Map.of( - "ALLOW_INSECURE_STORAGE_TYPES", - "true", - "SUPPORTED_CATALOG_STORAGE_TYPES", - List.of("FILE"), - "ENABLE_METRICS_EVENT_EMISSION", - String.valueOf(enabled)); - return TestServices.builder().config(config).withEventDelegator(true).build(); - } - - private void createCatalogAndNamespace(TestServices services) { - CatalogProperties.Builder propertiesBuilder = - CatalogProperties.builder() - .setDefaultBaseLocation(String.format("%s/%s", catalogLocation, CATALOG)); - - StorageConfigInfo config = - FileStorageConfigInfo.builder() - .setStorageType(StorageConfigInfo.StorageTypeEnum.FILE) - .build(); - Catalog catalogObject = - new Catalog( - Catalog.TypeEnum.INTERNAL, CATALOG, propertiesBuilder.build(), 0L, 0L, 1, config); - try (Response response = - services - .catalogsApi() - .createCatalog( - new CreateCatalogRequest(catalogObject), - services.realmContext(), - services.securityContext())) { - assertThat(response.getStatus()).isEqualTo(Response.Status.CREATED.getStatusCode()); - } - - CreateNamespaceRequest createNamespaceRequest = - CreateNamespaceRequest.builder().withNamespace(Namespace.of(NAMESPACE)).build(); - try (Response response = - services - .restApi() - .createNamespace( - CATALOG, - createNamespaceRequest, - services.realmContext(), - services.securityContext())) { - assertThat(response.getStatus()).isEqualTo(Response.Status.OK.getStatusCode()); - } - } - - private void createTable(TestServices services, String tableName) { - CreateTableRequest createTableRequest = - CreateTableRequest.builder() - .withName(tableName) - .withLocation( - String.format("%s/%s/%s/%s", catalogLocation, CATALOG, NAMESPACE, tableName)) - .withSchema(SCHEMA) - .build(); - services - .restApi() - .createTable( - CATALOG, - NAMESPACE, - createTableRequest, - null, - services.realmContext(), - services.securityContext()); - } -} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListenerTest.java b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListenerTest.java deleted file mode 100644 index 290f09ec8f..0000000000 --- a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/PolarisPersistenceEventListenerTest.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.events.listeners; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatCode; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import com.google.common.collect.ImmutableMap; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.metrics.CommitReport; -import org.apache.iceberg.metrics.ImmutableScanReport; -import org.apache.iceberg.metrics.ScanMetrics; -import org.apache.iceberg.metrics.ScanMetricsResult; -import org.apache.iceberg.metrics.ScanReport; -import org.apache.iceberg.rest.requests.ReportMetricsRequest; -import org.apache.polaris.service.events.AttributeMap; -import org.apache.polaris.service.events.EventAttributes; -import org.apache.polaris.service.events.ImmutablePolarisEventMetadata; -import org.apache.polaris.service.events.PolarisEvent; -import org.apache.polaris.service.events.PolarisEventMetadata; -import org.apache.polaris.service.events.PolarisEventType; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** - * Unit tests for {@link PolarisPersistenceEventListener} focusing on null-safety in metrics - * extraction. - */ -class PolarisPersistenceEventListenerTest { - - private TestPolarisPersistenceEventListener listener; - - @BeforeEach - void setUp() { - listener = new TestPolarisPersistenceEventListener(); - } - - @Test - void testScanReportWithNullMetadataValues() { - // Use mocks to simulate a ScanReport with null values in metadata - // (Iceberg's ImmutableScanReport.Builder doesn't allow nulls, but JSON deserialization might) - Map metadataWithNull = new HashMap<>(); - metadataWithNull.put("trace-id", "valid-trace-id"); - metadataWithNull.put("null-value-key", null); - - ScanReport mockScanReport = mock(ScanReport.class); - when(mockScanReport.snapshotId()).thenReturn(123L); - when(mockScanReport.schemaId()).thenReturn(0); - when(mockScanReport.metadata()).thenReturn(metadataWithNull); - when(mockScanReport.scanMetrics()).thenReturn(null); - - ReportMetricsRequest mockRequest = mock(ReportMetricsRequest.class); - when(mockRequest.report()).thenReturn(mockScanReport); - - PolarisEvent event = createAfterReportMetricsEvent(mockRequest); - - // Should not throw NPE - assertThatCode(() -> listener.onEvent(event)).doesNotThrowAnyException(); - - // Verify the valid metadata entry was captured, nulls were skipped - org.apache.polaris.core.entity.PolarisEvent persistedEvent = listener.getLastEvent(); - assertThat(persistedEvent).isNotNull(); - Map additionalProps = persistedEvent.getAdditionalPropertiesAsMap(); - assertThat(additionalProps).containsEntry("report.trace-id", "valid-trace-id"); - assertThat(additionalProps).doesNotContainKey("report.null-value-key"); - } - - @Test - void testCommitReportWithNullOperation() { - // Use mock to simulate a CommitReport with null operation - CommitReport mockCommitReport = mock(CommitReport.class); - when(mockCommitReport.snapshotId()).thenReturn(456L); - when(mockCommitReport.sequenceNumber()).thenReturn(1L); - when(mockCommitReport.operation()).thenReturn(null); // null operation - when(mockCommitReport.metadata()).thenReturn(ImmutableMap.of()); - when(mockCommitReport.commitMetrics()).thenReturn(null); - - ReportMetricsRequest mockRequest = mock(ReportMetricsRequest.class); - when(mockRequest.report()).thenReturn(mockCommitReport); - - PolarisEvent event = createAfterReportMetricsEvent(mockRequest); - - // Should not throw NPE - assertThatCode(() -> listener.onEvent(event)).doesNotThrowAnyException(); - - // Verify operation is not in additional properties (since it was null) - org.apache.polaris.core.entity.PolarisEvent persistedEvent = listener.getLastEvent(); - assertThat(persistedEvent).isNotNull(); - Map additionalProps = persistedEvent.getAdditionalPropertiesAsMap(); - assertThat(additionalProps) - .containsEntry("report_type", "commit") - .containsEntry("snapshot_id", "456") - .doesNotContainKey("operation"); - } - - @Test - void testCommitReportWithNullMetadataValues() { - // Use mock to simulate a CommitReport with null values in metadata - Map metadataWithNull = new HashMap<>(); - metadataWithNull.put("trace-id", "commit-trace-id"); - metadataWithNull.put("null-value-key", null); - - CommitReport mockCommitReport = mock(CommitReport.class); - when(mockCommitReport.snapshotId()).thenReturn(789L); - when(mockCommitReport.sequenceNumber()).thenReturn(2L); - when(mockCommitReport.operation()).thenReturn("append"); - when(mockCommitReport.metadata()).thenReturn(metadataWithNull); - when(mockCommitReport.commitMetrics()).thenReturn(null); - - ReportMetricsRequest mockRequest = mock(ReportMetricsRequest.class); - when(mockRequest.report()).thenReturn(mockCommitReport); - - PolarisEvent event = createAfterReportMetricsEvent(mockRequest); - - // Should not throw NPE - assertThatCode(() -> listener.onEvent(event)).doesNotThrowAnyException(); - - // Verify valid entries are captured, nulls are skipped - org.apache.polaris.core.entity.PolarisEvent persistedEvent = listener.getLastEvent(); - assertThat(persistedEvent).isNotNull(); - Map additionalProps = persistedEvent.getAdditionalPropertiesAsMap(); - assertThat(additionalProps) - .containsEntry("report.trace-id", "commit-trace-id") - .containsEntry("operation", "append") - .doesNotContainKey("report.null-value-key"); - } - - @Test - void testScanReportWithEmptyMetadata() { - ImmutableScanReport scanReport = - ImmutableScanReport.builder() - .schemaId(0) - .tableName("test_ns.test_table") - .snapshotId(100L) - .addProjectedFieldIds(1) - .addProjectedFieldNames("id") - .filter(Expressions.alwaysTrue()) - .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) - // Empty metadata map - .build(); - - ReportMetricsRequest request = ReportMetricsRequest.of(scanReport); - PolarisEvent event = createAfterReportMetricsEvent(request); - - // Should not throw any exception - assertThatCode(() -> listener.onEvent(event)).doesNotThrowAnyException(); - - org.apache.polaris.core.entity.PolarisEvent persistedEvent = listener.getLastEvent(); - assertThat(persistedEvent).isNotNull(); - Map additionalProps = persistedEvent.getAdditionalPropertiesAsMap(); - assertThat(additionalProps) - .containsEntry("report_type", "scan") - .containsEntry("snapshot_id", "100"); - } - - private PolarisEvent createAfterReportMetricsEvent(ReportMetricsRequest request) { - PolarisEventMetadata metadata = - ImmutablePolarisEventMetadata.builder() - .realmId("test-realm") - .requestId("test-request-id") - .openTelemetryContext(ImmutableMap.of()) - .build(); - - AttributeMap attributes = - new AttributeMap() - .put(EventAttributes.CATALOG_NAME, "test-catalog") - .put(EventAttributes.NAMESPACE, Namespace.of("test_ns")) - .put(EventAttributes.TABLE_NAME, "test_table") - .put(EventAttributes.REPORT_METRICS_REQUEST, request); - - return new PolarisEvent(PolarisEventType.AFTER_REPORT_METRICS, metadata, attributes); - } - - /** Concrete test implementation that captures persisted events for verification. */ - private static class TestPolarisPersistenceEventListener extends PolarisPersistenceEventListener { - private final Map events = - new ConcurrentHashMap<>(); - private org.apache.polaris.core.entity.PolarisEvent lastEvent; - - @Override - protected void processEvent(String realmId, org.apache.polaris.core.entity.PolarisEvent event) { - events.put(event.getId(), event); - lastEvent = event; - } - - public org.apache.polaris.core.entity.PolarisEvent getLastEvent() { - return lastEvent; - } - } -} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java index 05f6a1d54f..c3709ea77d 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java @@ -100,7 +100,6 @@ public Map getConfigOverrides() { .put("polaris.features.\"ALLOW_INSECURE_STORAGE_TYPES\"", "true") .put("polaris.features.\"SUPPORTED_CATALOG_STORAGE_TYPES\"", "[\"FILE\",\"S3\"]") .put("polaris.features.\"ALLOW_OVERLAPPING_CATALOG_URLS\"", "true") - .put("polaris.features.\"ENABLE_METRICS_EVENT_EMISSION\"", "true") .put("polaris.readiness.ignore-severe-issues", "true") .build(); } @@ -252,351 +251,4 @@ void testCreateCatalogAndTable() throws IOException { .hasEntrySatisfying("otel.trace_id", value -> assertThat(value).matches("[0-9a-f]{32}")) .hasEntrySatisfying("otel.span_id", value -> assertThat(value).matches("[0-9a-f]{16}")); } - - /** - * Tests that reportMetrics events are emitted with proper trace context for correlation. This - * verifies that compute engine metrics reports can be correlated with other catalog operations - * via the OpenTelemetry trace_id. - */ - @Test - void testReportMetricsEventWithTraceContext() throws IOException { - String catalogName = client.newEntityName("testReportMetrics"); - // Use a unique base location for this catalog to avoid overlap with other catalogs - URI catalogBaseLocation = baseLocation.resolve(catalogName + "/"); - - Catalog catalog = - PolarisCatalog.builder() - .setName(catalogName) - .setType(Catalog.TypeEnum.INTERNAL) - .setProperties(CatalogProperties.builder("file:///tmp/").build()) - .setStorageConfigInfo( - FileStorageConfigInfo.builder() - .setStorageType(StorageConfigInfo.StorageTypeEnum.FILE) - .setAllowedLocations(List.of(catalogBaseLocation.toString())) - .build()) - .build(); - - try (Response response = - managementApi - .request("v1/catalogs") - .header("X-Request-ID", "metrics-test") - .post(Entity.json(catalog))) { - assertThat(response).returns(Response.Status.CREATED.getStatusCode(), Response::getStatus); - } - - // Create a table first - try (RESTSessionCatalog sessionCatalog = new RESTSessionCatalog()) { - sessionCatalog.initialize( - "polaris_catalog_metrics_test", - ImmutableMap.builder() - .put("uri", endpoints.catalogApiEndpoint().toString()) - .put(OAuth2Properties.TOKEN, authToken) - .put("warehouse", catalogName) - .putAll(endpoints.extraHeaders("header.")) - .build()); - - SessionCatalog.SessionContext sessionContext = SessionCatalog.SessionContext.createEmpty(); - Namespace ns = Namespace.of("metrics_ns"); - sessionCatalog.createNamespace(sessionContext, ns); - - sessionCatalog - .buildTable( - sessionContext, - TableIdentifier.of(ns, "metrics_table"), - new Schema(List.of(Types.NestedField.required(1, "id", Types.IntegerType.get())))) - .withSortOrder(SortOrder.unsorted()) - .withPartitionSpec(PartitionSpec.unpartitioned()) - .create(); - } - - // Now send a metrics report via the REST API - // Build a minimal ScanReport for testing - ScanReport scanReport = - ImmutableScanReport.builder() - .schemaId(0) - .tableName("metrics_ns.metrics_table") - .snapshotId(-1L) - .addProjectedFieldIds(1) - .addProjectedFieldNames("id") - .filter(Expressions.alwaysTrue()) - .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) - .build(); - - ReportMetricsRequest metricsRequest = ReportMetricsRequest.of(scanReport); - - RestApi catalogApi = client.catalogApi(authToken); - try (Response response = - catalogApi - .request("v1/" + catalogName + "/namespaces/metrics_ns/tables/metrics_table/metrics") - .header("X-Request-ID", "metrics-report-123") - .post(Entity.json(metricsRequest))) { - assertThat(response).returns(204, Response::getStatus); - } - - // Query for the AFTER_REPORT_METRICS event - String query = - "SELECT * FROM polaris_schema.events WHERE realm_id = '" - + realm - + "' AND event_type = 'AFTER_REPORT_METRICS' ORDER BY timestamp_ms DESC"; - - List metricsEvents = - await() - .atMost(Duration.ofSeconds(10)) - .until( - () -> { - ImmutableList.Builder e = ImmutableList.builder(); - try (Connection connection = dataSource.get().getConnection(); - Statement statement = connection.createStatement(); - ResultSet rs = statement.executeQuery(query)) { - while (rs.next()) { - PolarisEvent event = CONVERTER.fromResultSet(rs); - e.add(event); - } - } - return e.build(); - }, - e -> !e.isEmpty()); - - PolarisEvent metricsEvent = metricsEvents.getFirst(); - assertThat(metricsEvent.getCatalogId()).isEqualTo(catalogName); - assertThat(metricsEvent.getResourceType()).isEqualTo(PolarisEvent.ResourceType.TABLE); - assertThat(metricsEvent.getResourceIdentifier()).isEqualTo("metrics_ns.metrics_table"); - assertThat(metricsEvent.getEventType()).isEqualTo("AFTER_REPORT_METRICS"); - assertThat(metricsEvent.getPrincipalName()).isEqualTo("root"); - assertThat(metricsEvent.getRequestId()).isEqualTo("metrics-report-123"); - - // Verify OpenTelemetry trace context is present for correlation - assertThat(metricsEvent.getAdditionalPropertiesAsMap()) - .containsEntry("otel.trace_flags", "01") - .containsEntry("otel.sampled", "true") - .hasEntrySatisfying("otel.trace_id", value -> assertThat(value).matches("[0-9a-f]{32}")) - .hasEntrySatisfying("otel.span_id", value -> assertThat(value).matches("[0-9a-f]{16}")); - } - - /** - * Tests that ScanReport with trace-id in metadata is properly extracted and stored. This verifies - * that compute engines can pass trace context in the report's metadata map for correlation. - */ - @Test - void testReportMetricsWithTraceIdInMetadata() throws IOException { - String catalogName = client.newEntityName("testMetricsTraceId"); - // Use a unique base location for this catalog to avoid overlap with other catalogs - URI catalogBaseLocation = baseLocation.resolve(catalogName + "/"); - - Catalog catalog = - PolarisCatalog.builder() - .setName(catalogName) - .setType(Catalog.TypeEnum.INTERNAL) - .setProperties(CatalogProperties.builder("file:///tmp/").build()) - .setStorageConfigInfo( - FileStorageConfigInfo.builder() - .setStorageType(StorageConfigInfo.StorageTypeEnum.FILE) - .setAllowedLocations(List.of(catalogBaseLocation.toString())) - .build()) - .build(); - - try (Response response = managementApi.request("v1/catalogs").post(Entity.json(catalog))) { - assertThat(response).returns(Response.Status.CREATED.getStatusCode(), Response::getStatus); - } - - // Create a table first - try (RESTSessionCatalog sessionCatalog = new RESTSessionCatalog()) { - sessionCatalog.initialize( - "polaris_catalog_trace_test", - ImmutableMap.builder() - .put("uri", endpoints.catalogApiEndpoint().toString()) - .put(OAuth2Properties.TOKEN, authToken) - .put("warehouse", catalogName) - .putAll(endpoints.extraHeaders("header.")) - .build()); - - SessionCatalog.SessionContext sessionContext = SessionCatalog.SessionContext.createEmpty(); - Namespace ns = Namespace.of("trace_ns"); - sessionCatalog.createNamespace(sessionContext, ns); - - sessionCatalog - .buildTable( - sessionContext, - TableIdentifier.of(ns, "trace_table"), - new Schema(List.of(Types.NestedField.required(1, "id", Types.IntegerType.get())))) - .withSortOrder(SortOrder.unsorted()) - .withPartitionSpec(PartitionSpec.unpartitioned()) - .create(); - } - - // Build a ScanReport with trace-id in metadata (as compute engines would do) - String clientTraceId = "abcdef1234567890abcdef1234567890"; - ScanReport scanReport = - ImmutableScanReport.builder() - .schemaId(0) - .tableName("trace_ns.trace_table") - .snapshotId(123L) - .addProjectedFieldIds(1) - .addProjectedFieldNames("id") - .filter(Expressions.alwaysTrue()) - .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) - .putMetadata("trace-id", clientTraceId) - .putMetadata("client-app", "spark-test") - .build(); - - ReportMetricsRequest metricsRequest = ReportMetricsRequest.of(scanReport); - - RestApi catalogApi = client.catalogApi(authToken); - try (Response response = - catalogApi - .request("v1/" + catalogName + "/namespaces/trace_ns/tables/trace_table/metrics") - .header("X-Request-ID", "trace-test-456") - .post(Entity.json(metricsRequest))) { - assertThat(response).returns(204, Response::getStatus); - } - - // Query for the AFTER_REPORT_METRICS event - String query = - "SELECT * FROM polaris_schema.events WHERE realm_id = '" - + realm - + "' AND event_type = 'AFTER_REPORT_METRICS' AND request_id = 'trace-test-456'"; - - List metricsEvents = - await() - .atMost(Duration.ofSeconds(10)) - .until( - () -> { - ImmutableList.Builder e = ImmutableList.builder(); - try (Connection connection = dataSource.get().getConnection(); - Statement statement = connection.createStatement(); - ResultSet rs = statement.executeQuery(query)) { - while (rs.next()) { - PolarisEvent event = CONVERTER.fromResultSet(rs); - e.add(event); - } - } - return e.build(); - }, - e -> !e.isEmpty()); - - PolarisEvent metricsEvent = metricsEvents.getFirst(); - assertThat(metricsEvent.getEventType()).isEqualTo("AFTER_REPORT_METRICS"); - - // Verify trace-id from report metadata is extracted with "report." prefix - Map additionalProps = metricsEvent.getAdditionalPropertiesAsMap(); - assertThat(additionalProps) - .containsEntry("report.trace-id", clientTraceId) - .containsEntry("report.client-app", "spark-test") - .containsEntry("report_type", "scan") - .containsEntry("snapshot_id", "123") - .containsEntry("schema_id", "0"); - } - - /** - * Tests that CommitReport metrics are properly extracted and stored. This verifies the commit - * metrics path including operation type, sequence number, and commit metrics. - */ - @Test - void testReportCommitMetrics() throws IOException { - String catalogName = client.newEntityName("testCommitMetrics"); - // Use a unique base location for this catalog to avoid overlap with other catalogs - URI catalogBaseLocation = baseLocation.resolve(catalogName + "/"); - - Catalog catalog = - PolarisCatalog.builder() - .setName(catalogName) - .setType(Catalog.TypeEnum.INTERNAL) - .setProperties(CatalogProperties.builder("file:///tmp/").build()) - .setStorageConfigInfo( - FileStorageConfigInfo.builder() - .setStorageType(StorageConfigInfo.StorageTypeEnum.FILE) - .setAllowedLocations(List.of(catalogBaseLocation.toString())) - .build()) - .build(); - - try (Response response = managementApi.request("v1/catalogs").post(Entity.json(catalog))) { - assertThat(response).returns(Response.Status.CREATED.getStatusCode(), Response::getStatus); - } - - // Create a table first - try (RESTSessionCatalog sessionCatalog = new RESTSessionCatalog()) { - sessionCatalog.initialize( - "polaris_catalog_commit_test", - ImmutableMap.builder() - .put("uri", endpoints.catalogApiEndpoint().toString()) - .put(OAuth2Properties.TOKEN, authToken) - .put("warehouse", catalogName) - .putAll(endpoints.extraHeaders("header.")) - .build()); - - SessionCatalog.SessionContext sessionContext = SessionCatalog.SessionContext.createEmpty(); - Namespace ns = Namespace.of("commit_ns"); - sessionCatalog.createNamespace(sessionContext, ns); - - sessionCatalog - .buildTable( - sessionContext, - TableIdentifier.of(ns, "commit_table"), - new Schema(List.of(Types.NestedField.required(1, "id", Types.IntegerType.get())))) - .withSortOrder(SortOrder.unsorted()) - .withPartitionSpec(PartitionSpec.unpartitioned()) - .create(); - } - - // Build a CommitReport - org.apache.iceberg.metrics.CommitReport commitReport = - org.apache.iceberg.metrics.ImmutableCommitReport.builder() - .tableName("commit_ns.commit_table") - .snapshotId(456L) - .sequenceNumber(1L) - .operation("append") - .commitMetrics( - org.apache.iceberg.metrics.CommitMetricsResult.from( - org.apache.iceberg.metrics.CommitMetrics.noop(), ImmutableMap.of())) - .putMetadata("trace-id", "commit-trace-123") - .build(); - - ReportMetricsRequest metricsRequest = ReportMetricsRequest.of(commitReport); - - RestApi catalogApi = client.catalogApi(authToken); - try (Response response = - catalogApi - .request("v1/" + catalogName + "/namespaces/commit_ns/tables/commit_table/metrics") - .header("X-Request-ID", "commit-test-789") - .post(Entity.json(metricsRequest))) { - assertThat(response).returns(204, Response::getStatus); - } - - // Query for the AFTER_REPORT_METRICS event - String query = - "SELECT * FROM polaris_schema.events WHERE realm_id = '" - + realm - + "' AND event_type = 'AFTER_REPORT_METRICS' AND request_id = 'commit-test-789'"; - - List metricsEvents = - await() - .atMost(Duration.ofSeconds(10)) - .until( - () -> { - ImmutableList.Builder e = ImmutableList.builder(); - try (Connection connection = dataSource.get().getConnection(); - Statement statement = connection.createStatement(); - ResultSet rs = statement.executeQuery(query)) { - while (rs.next()) { - PolarisEvent event = CONVERTER.fromResultSet(rs); - e.add(event); - } - } - return e.build(); - }, - e -> !e.isEmpty()); - - PolarisEvent metricsEvent = metricsEvents.getFirst(); - assertThat(metricsEvent.getEventType()).isEqualTo("AFTER_REPORT_METRICS"); - assertThat(metricsEvent.getResourceIdentifier()).isEqualTo("commit_ns.commit_table"); - - // Verify commit report data is extracted - Map additionalProps = metricsEvent.getAdditionalPropertiesAsMap(); - assertThat(additionalProps) - .containsEntry("report_type", "commit") - .containsEntry("snapshot_id", "456") - .containsEntry("sequence_number", "1") - .containsEntry("operation", "append") - .containsEntry("report.trace-id", "commit-trace-123"); - } } diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/CompositeMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/CompositeMetricsReporterTest.java deleted file mode 100644 index a4e67248be..0000000000 --- a/runtime/service/src/test/java/org/apache/polaris/service/reporting/CompositeMetricsReporterTest.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; - -import java.util.List; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.ScanReport; -import org.junit.jupiter.api.Test; - -class CompositeMetricsReporterTest { - - @Test - void testDelegatesToAllReporters() { - PolarisMetricsReporter reporter1 = mock(PolarisMetricsReporter.class); - PolarisMetricsReporter reporter2 = mock(PolarisMetricsReporter.class); - PolarisMetricsReporter reporter3 = mock(PolarisMetricsReporter.class); - - CompositeMetricsReporter composite = - new CompositeMetricsReporter(List.of(reporter1, reporter2, reporter3)); - - ScanReport scanReport = mock(ScanReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - composite.reportMetric("test-catalog", table, scanReport); - - verify(reporter1).reportMetric("test-catalog", table, scanReport); - verify(reporter2).reportMetric("test-catalog", table, scanReport); - verify(reporter3).reportMetric("test-catalog", table, scanReport); - } - - @Test - void testContinuesOnDelegateFailure() { - PolarisMetricsReporter reporter1 = mock(PolarisMetricsReporter.class); - PolarisMetricsReporter reporter2 = mock(PolarisMetricsReporter.class); - PolarisMetricsReporter reporter3 = mock(PolarisMetricsReporter.class); - - // Make reporter2 throw an exception - doThrow(new RuntimeException("Reporter 2 failed")) - .when(reporter2) - .reportMetric(any(), any(), any()); - - CompositeMetricsReporter composite = - new CompositeMetricsReporter(List.of(reporter1, reporter2, reporter3)); - - ScanReport scanReport = mock(ScanReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - // Should not throw - composite.reportMetric("test-catalog", table, scanReport); - - // All reporters should still be called - verify(reporter1).reportMetric("test-catalog", table, scanReport); - verify(reporter2).reportMetric("test-catalog", table, scanReport); - verify(reporter3).reportMetric("test-catalog", table, scanReport); - } - - @Test - void testEmptyDelegatesList() { - CompositeMetricsReporter composite = new CompositeMetricsReporter(List.of()); - - ScanReport scanReport = mock(ScanReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - // Should not throw - composite.reportMetric("test-catalog", table, scanReport); - - assertThat(composite.getDelegates()).isEmpty(); - } - - @Test - void testSingleDelegate() { - PolarisMetricsReporter reporter = mock(PolarisMetricsReporter.class); - CompositeMetricsReporter composite = new CompositeMetricsReporter(List.of(reporter)); - - ScanReport scanReport = mock(ScanReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - composite.reportMetric("test-catalog", table, scanReport); - - verify(reporter).reportMetric("test-catalog", table, scanReport); - assertThat(composite.getDelegates()).hasSize(1); - } - - @Test - void testGetDelegatesReturnsUnmodifiableList() { - PolarisMetricsReporter reporter = mock(PolarisMetricsReporter.class); - CompositeMetricsReporter composite = new CompositeMetricsReporter(List.of(reporter)); - - List delegates = composite.getDelegates(); - - // Should be unmodifiable - assertThat(delegates).hasSize(1); - org.junit.jupiter.api.Assertions.assertThrows( - UnsupportedOperationException.class, - () -> delegates.add(mock(PolarisMetricsReporter.class))); - } - - @Test - void testNullMetricsReportDoesNotThrow() { - PolarisMetricsReporter reporter = mock(PolarisMetricsReporter.class); - CompositeMetricsReporter composite = new CompositeMetricsReporter(List.of(reporter)); - - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - // Should not throw even with null report - composite.reportMetric("test-catalog", table, null); - - verify(reporter).reportMetric(eq("test-catalog"), eq(table), eq(null)); - } -} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/DefaultMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/DefaultMetricsReporterTest.java index 8762c3ed74..a8b66a677b 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/reporting/DefaultMetricsReporterTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/DefaultMetricsReporterTest.java @@ -44,3 +44,4 @@ void testLogging() { verify(mockConsumer).accept(warehouse, table, metricsReport, receivedTimestamp); } } + diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/EventsMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/EventsMetricsReporterTest.java deleted file mode 100644 index c36d8a76c9..0000000000 --- a/runtime/service/src/test/java/org/apache/polaris/service/reporting/EventsMetricsReporterTest.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import com.fasterxml.jackson.databind.ObjectMapper; -import io.quarkus.security.identity.SecurityIdentity; -import jakarta.enterprise.inject.Instance; -import java.util.List; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.CommitReport; -import org.apache.iceberg.metrics.MetricsReport; -import org.apache.iceberg.metrics.ScanReport; -import org.apache.polaris.core.context.RealmContext; -import org.apache.polaris.core.entity.PolarisEvent; -import org.apache.polaris.core.persistence.BasePersistence; -import org.apache.polaris.core.persistence.MetaStoreManagerFactory; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.ArgumentCaptor; - -class EventsMetricsReporterTest { - - private MetaStoreManagerFactory metaStoreManagerFactory; - private RealmContext realmContext; - private BasePersistence persistence; - private ObjectMapper objectMapper; - - @SuppressWarnings("unchecked") - private Instance securityIdentityInstance = mock(Instance.class); - - private EventsMetricsReporter reporter; - - @BeforeEach - void setUp() { - metaStoreManagerFactory = mock(MetaStoreManagerFactory.class); - realmContext = mock(RealmContext.class); - persistence = mock(BasePersistence.class); - objectMapper = new ObjectMapper(); - - when(realmContext.getRealmIdentifier()).thenReturn("test-realm"); - when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(persistence); - when(securityIdentityInstance.isResolvable()).thenReturn(false); - - reporter = - new EventsMetricsReporter( - metaStoreManagerFactory, realmContext, objectMapper, securityIdentityInstance); - } - - @Test - void testReportScanMetrics() { - ScanReport scanReport = mock(ScanReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - reporter.reportMetric("test-catalog", table, scanReport); - - @SuppressWarnings("unchecked") - ArgumentCaptor> captor = ArgumentCaptor.forClass(List.class); - verify(persistence).writeEvents(captor.capture()); - - List events = captor.getValue(); - assertThat(events).hasSize(1); - - PolarisEvent event = events.get(0); - assertThat(event.getEventType()).isEqualTo(EventsMetricsReporter.EVENT_TYPE_SCAN_REPORT); - assertThat(event.getCatalogId()).isEqualTo("test-catalog"); - assertThat(event.getResourceType()).isEqualTo(PolarisEvent.ResourceType.TABLE); - assertThat(event.getResourceIdentifier()).isEqualTo("db.test_table"); - } - - @Test - void testReportCommitMetrics() { - CommitReport commitReport = mock(CommitReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - reporter.reportMetric("test-catalog", table, commitReport); - - @SuppressWarnings("unchecked") - ArgumentCaptor> captor = ArgumentCaptor.forClass(List.class); - verify(persistence).writeEvents(captor.capture()); - - List events = captor.getValue(); - assertThat(events).hasSize(1); - - PolarisEvent event = events.get(0); - assertThat(event.getEventType()).isEqualTo(EventsMetricsReporter.EVENT_TYPE_COMMIT_REPORT); - assertThat(event.getCatalogId()).isEqualTo("test-catalog"); - assertThat(event.getResourceType()).isEqualTo(PolarisEvent.ResourceType.TABLE); - assertThat(event.getResourceIdentifier()).isEqualTo("db.test_table"); - } - - @Test - void testUnknownMetricsReportTypeIsIgnored() { - MetricsReport unknownReport = mock(MetricsReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - reporter.reportMetric("test-catalog", table, unknownReport); - - verify(persistence, never()).writeEvents(any()); - } - - @Test - void testEventContainsSerializedMetrics() { - // Create a mock ScanReport - ScanReport scanReport = mock(ScanReport.class); - when(scanReport.tableName()).thenReturn("test_table"); - when(scanReport.snapshotId()).thenReturn(12345L); - - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - reporter.reportMetric("test-catalog", table, scanReport); - - @SuppressWarnings("unchecked") - ArgumentCaptor> captor = ArgumentCaptor.forClass(List.class); - verify(persistence).writeEvents(captor.capture()); - - PolarisEvent event = captor.getValue().get(0); - String additionalProps = event.getAdditionalProperties(); - // Should contain JSON (at minimum an empty object or serialized report) - assertThat(additionalProps).isNotNull(); - } - - @Test - void testPersistenceErrorDoesNotThrow() { - ScanReport scanReport = mock(ScanReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - when(metaStoreManagerFactory.getOrCreateSession(any())) - .thenThrow(new RuntimeException("Database error")); - - // Should not throw - reporter.reportMetric("test-catalog", table, scanReport); - } -} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java deleted file mode 100644 index 64740c5bfa..0000000000 --- a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import io.quarkus.security.identity.SecurityIdentity; -import jakarta.enterprise.inject.Instance; -import java.security.Principal; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.CommitReport; -import org.apache.iceberg.metrics.MetricsReport; -import org.apache.iceberg.metrics.ScanReport; -import org.apache.polaris.core.context.RealmContext; -import org.apache.polaris.core.persistence.BasePersistence; -import org.apache.polaris.core.persistence.MetaStoreManagerFactory; -import org.apache.polaris.persistence.relational.jdbc.JdbcBasePersistenceImpl; -import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; -import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.ArgumentCaptor; - -class PersistingMetricsReporterTest { - - private MetaStoreManagerFactory metaStoreManagerFactory; - private RealmContext realmContext; - private JdbcBasePersistenceImpl jdbcPersistence; - private BasePersistence nonJdbcPersistence; - - @SuppressWarnings("unchecked") - private Instance securityIdentityInstance = mock(Instance.class); - - private PersistingMetricsReporter reporter; - - @BeforeEach - void setUp() { - metaStoreManagerFactory = mock(MetaStoreManagerFactory.class); - realmContext = mock(RealmContext.class); - jdbcPersistence = mock(JdbcBasePersistenceImpl.class); - nonJdbcPersistence = mock(BasePersistence.class); - - when(realmContext.getRealmIdentifier()).thenReturn("test-realm"); - when(securityIdentityInstance.isResolvable()).thenReturn(false); - - reporter = - new PersistingMetricsReporter( - metaStoreManagerFactory, realmContext, securityIdentityInstance); - } - - @Test - void testReportScanMetricsWithJdbcBackend() { - when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(jdbcPersistence); - - ScanReport scanReport = mock(ScanReport.class); - when(scanReport.tableName()).thenReturn("test_table"); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - reporter.reportMetric("test-catalog", table, scanReport); - - verify(jdbcPersistence).writeScanMetricsReport(any(ModelScanMetricsReport.class)); - } - - @Test - void testReportCommitMetricsWithJdbcBackend() { - when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(jdbcPersistence); - - CommitReport commitReport = mock(CommitReport.class); - when(commitReport.tableName()).thenReturn("test_table"); - when(commitReport.snapshotId()).thenReturn(12345L); - when(commitReport.operation()).thenReturn("append"); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - reporter.reportMetric("test-catalog", table, commitReport); - - verify(jdbcPersistence).writeCommitMetricsReport(any(ModelCommitMetricsReport.class)); - } - - @Test - void testFallbackToLoggingWithNonJdbcBackend() { - when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(nonJdbcPersistence); - - ScanReport scanReport = mock(ScanReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - // Should not throw, just log - reporter.reportMetric("test-catalog", table, scanReport); - - // Verify no JDBC methods were called - verify(jdbcPersistence, never()).writeScanMetricsReport(any()); - } - - @Test - void testUnknownMetricsReportTypeIsIgnored() { - when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(jdbcPersistence); - - MetricsReport unknownReport = mock(MetricsReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - reporter.reportMetric("test-catalog", table, unknownReport); - - verify(jdbcPersistence, never()).writeScanMetricsReport(any()); - verify(jdbcPersistence, never()).writeCommitMetricsReport(any()); - } - - @Test - void testPrincipalNameExtraction() { - when(metaStoreManagerFactory.getOrCreateSession(any())).thenReturn(jdbcPersistence); - - // Set up security identity with a principal - SecurityIdentity identity = mock(SecurityIdentity.class); - Principal principal = mock(Principal.class); - when(principal.getName()).thenReturn("test-user"); - when(identity.isAnonymous()).thenReturn(false); - when(identity.getPrincipal()).thenReturn(principal); - when(securityIdentityInstance.isResolvable()).thenReturn(true); - when(securityIdentityInstance.get()).thenReturn(identity); - - ScanReport scanReport = mock(ScanReport.class); - when(scanReport.tableName()).thenReturn("test_table"); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - reporter.reportMetric("test-catalog", table, scanReport); - - ArgumentCaptor captor = - ArgumentCaptor.forClass(ModelScanMetricsReport.class); - verify(jdbcPersistence).writeScanMetricsReport(captor.capture()); - - // The principal name should be captured in the report - // Note: The actual assertion depends on how the model is built - } - - @Test - void testPersistenceErrorDoesNotThrow() { - when(metaStoreManagerFactory.getOrCreateSession(any())) - .thenThrow(new RuntimeException("Database error")); - - ScanReport scanReport = mock(ScanReport.class); - TableIdentifier table = TableIdentifier.of("db", "test_table"); - - // Should not throw - reporter.reportMetric("test-catalog", table, scanReport); - } -} diff --git a/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java b/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java index 40fca8c2c3..30303121e1 100644 --- a/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java +++ b/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java @@ -83,7 +83,6 @@ import org.apache.polaris.service.context.catalog.PolarisCallContextCatalogFactory; import org.apache.polaris.service.credentials.DefaultPolarisCredentialManager; import org.apache.polaris.service.credentials.connection.SigV4ConnectionCredentialVendor; -import org.apache.polaris.service.events.EventAttributeMap; import org.apache.polaris.service.events.PolarisEventMetadata; import org.apache.polaris.service.events.PolarisEventMetadataFactory; import org.apache.polaris.service.events.listeners.PolarisEventListener; @@ -335,7 +334,6 @@ public String getAuthenticationScheme() { Mockito.when(externalCatalogFactory.select(any())).thenReturn(externalCatalogFactory); Mockito.when(externalCatalogFactory.isUnsatisfied()).thenReturn(true); - EventAttributeMap eventAttributeMap = new EventAttributeMap(); IcebergCatalogAdapter catalogService = new IcebergCatalogAdapter( diagnostics, @@ -353,8 +351,7 @@ public String getAuthenticationScheme() { externalCatalogFactory, storageAccessConfigProvider, new DefaultMetricsReporter(), - Clock.systemUTC(), - eventAttributeMap); + Clock.systemUTC()); // Optionally wrap with event delegator IcebergRestCatalogApiService finalRestCatalogService = catalogService; @@ -365,9 +362,7 @@ public String getAuthenticationScheme() { catalogService, polarisEventListener, eventMetadataFactory, - new DefaultCatalogPrefixParser(), - eventAttributeMap, - realmConfig); + new DefaultCatalogPrefixParser()); finalRestConfigurationService = new IcebergRestConfigurationEventServiceDelegator( catalogService, polarisEventListener, eventMetadataFactory); From b86f67020f4ede3b4648607a47b12475a93d4efb Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Mon, 19 Jan 2026 09:31:22 -0800 Subject: [PATCH 23/67] refactor: Replace event-based metrics with MetricsProcessor interface This commit removes the event-based metrics reporting system and introduces a new MetricsProcessor interface with CDI support. This is the foundation for a simpler, more direct metrics processing architecture. Changes: - Remove ENABLE_METRICS_EVENT_EMISSION feature flag - Remove BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS event types - Remove REPORT_METRICS_REQUEST event attribute - Remove event emission from IcebergRestCatalogEventServiceDelegator.reportMetrics() - Remove ReportMetricsEventTest - Add MetricsProcessor interface for processing metrics reports - Add MetricsProcessingContext with rich contextual information (realm ID, principal, request ID, OpenTelemetry trace context) - Add MetricsProcessorConfiguration for type-safe configuration - Add CDI producer in ServiceProducers for MetricsProcessor The new MetricsProcessor interface provides: - Simpler, more direct processing (no events) - Rich context with realm, principal, request ID, OTel trace - CDI-based extensibility via @Identifier annotations - Type-safe configuration Implementations will be added in subsequent PRs. This commit provides the foundational interfaces and CDI infrastructure. Backward compatibility: The existing PolarisMetricsReporter interface and configuration remain unchanged and functional. --- .polaris-work-notes.md | 18 +++ ...ebergRestCatalogEventServiceDelegator.java | 132 ++++++++---------- .../service/config/ServiceProducers.java | 40 ++++++ .../service/events/EventAttributes.java | 4 - 4 files changed, 113 insertions(+), 81 deletions(-) create mode 100644 .polaris-work-notes.md diff --git a/.polaris-work-notes.md b/.polaris-work-notes.md new file mode 100644 index 0000000000..cbd906acff --- /dev/null +++ b/.polaris-work-notes.md @@ -0,0 +1,18 @@ +# Polaris Work Notes + +## Active Branches + +### feat-3337-rest-catalog-metrics-table-merged +- **Remote**: `obelix74/polaris` (origin) +- **PR**: #3385 +- **Description**: Add metrics persistence with dual storage strategy for Iceberg table operations +- **Last Updated**: 2026-01-17 +- **Notes**: + - This branch is tracking `origin/feat-3337-rest-catalog-metrics-table-merged` + - Fixed failing tests by adding `ENABLE_METRICS_EVENT_EMISSION` feature flag + - To push: `git push origin feat-3337-rest-catalog-metrics-table-merged` + +## Git Remotes +- **origin**: https://github.com/obelix74/polaris.git (your fork) +- **upstream**: https://github.com/apache/polaris.git (Apache upstream) + diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java index c5f526e756..3a8a35e9e2 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java @@ -26,8 +26,6 @@ import jakarta.inject.Inject; import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.SecurityContext; -import java.util.List; -import org.apache.iceberg.TableMetadata; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.rest.requests.CommitTransactionRequest; import org.apache.iceberg.rest.requests.CreateNamespaceRequest; @@ -43,13 +41,11 @@ import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.iceberg.rest.responses.LoadViewResponse; import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse; -import org.apache.polaris.core.config.FeatureConfiguration; -import org.apache.polaris.core.config.RealmConfig; import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.service.catalog.CatalogPrefixParser; import org.apache.polaris.service.catalog.api.IcebergRestCatalogApiService; import org.apache.polaris.service.catalog.common.CatalogAdapter; -import org.apache.polaris.service.events.EventAttributeMap; +import org.apache.polaris.service.events.AttributeMap; import org.apache.polaris.service.events.EventAttributes; import org.apache.polaris.service.events.PolarisEvent; import org.apache.polaris.service.events.PolarisEventMetadataFactory; @@ -68,8 +64,6 @@ public class IcebergRestCatalogEventServiceDelegator @Inject PolarisEventListener polarisEventListener; @Inject PolarisEventMetadataFactory eventMetadataFactory; @Inject CatalogPrefixParser prefixParser; - @Inject EventAttributeMap eventAttributeMap; - @Inject RealmConfig realmConfig; // Constructor for testing - allows manual dependency injection @VisibleForTesting @@ -77,15 +71,11 @@ public IcebergRestCatalogEventServiceDelegator( IcebergCatalogAdapter delegate, PolarisEventListener polarisEventListener, PolarisEventMetadataFactory eventMetadataFactory, - CatalogPrefixParser prefixParser, - EventAttributeMap eventAttributeMap, - RealmConfig realmConfig) { + CatalogPrefixParser prefixParser) { this.delegate = delegate; this.polarisEventListener = polarisEventListener; this.eventMetadataFactory = eventMetadataFactory; this.prefixParser = prefixParser; - this.eventAttributeMap = eventAttributeMap; - this.realmConfig = realmConfig; } // Default constructor for CDI @@ -102,7 +92,7 @@ public Response createNamespace( new PolarisEvent( PolarisEventType.BEFORE_CREATE_NAMESPACE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.CREATE_NAMESPACE_REQUEST, createNamespaceRequest))); Response resp = @@ -112,7 +102,7 @@ public Response createNamespace( new PolarisEvent( PolarisEventType.AFTER_CREATE_NAMESPACE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, createNamespaceResponse.namespace()) .put(EventAttributes.NAMESPACE_PROPERTIES, createNamespaceResponse.properties()))); @@ -132,7 +122,7 @@ public Response listNamespaces( new PolarisEvent( PolarisEventType.BEFORE_LIST_NAMESPACES, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.PARENT_NAMESPACE_FQN, parent))); Response resp = @@ -141,7 +131,7 @@ public Response listNamespaces( new PolarisEvent( PolarisEventType.AFTER_LIST_NAMESPACES, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.PARENT_NAMESPACE_FQN, parent))); return resp; @@ -155,7 +145,7 @@ public Response loadNamespaceMetadata( new PolarisEvent( PolarisEventType.BEFORE_LOAD_NAMESPACE_METADATA, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, decodeNamespace(namespace)))); Response resp = @@ -165,7 +155,7 @@ public Response loadNamespaceMetadata( new PolarisEvent( PolarisEventType.AFTER_LOAD_NAMESPACE_METADATA, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, getNamespaceResponse.namespace()) .put(EventAttributes.NAMESPACE_PROPERTIES, getNamespaceResponse.properties()))); @@ -181,7 +171,7 @@ public Response namespaceExists( new PolarisEvent( PolarisEventType.BEFORE_CHECK_EXISTS_NAMESPACE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); Response resp = delegate.namespaceExists(prefix, namespace, realmContext, securityContext); @@ -189,7 +179,7 @@ public Response namespaceExists( new PolarisEvent( PolarisEventType.AFTER_CHECK_EXISTS_NAMESPACE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); return resp; @@ -203,7 +193,7 @@ public Response dropNamespace( new PolarisEvent( PolarisEventType.BEFORE_DROP_NAMESPACE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, decodeNamespace(namespace)))); Response resp = delegate.dropNamespace(prefix, namespace, realmContext, securityContext); @@ -211,7 +201,7 @@ public Response dropNamespace( new PolarisEvent( PolarisEventType.AFTER_DROP_NAMESPACE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE_FQN, namespace))); return resp; @@ -230,7 +220,7 @@ public Response updateProperties( new PolarisEvent( PolarisEventType.BEFORE_UPDATE_NAMESPACE_PROPERTIES, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put( @@ -243,7 +233,7 @@ public Response updateProperties( new PolarisEvent( PolarisEventType.AFTER_UPDATE_NAMESPACE_PROPERTIES, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put( @@ -266,7 +256,7 @@ public Response createTable( new PolarisEvent( PolarisEventType.BEFORE_CREATE_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.CREATE_TABLE_REQUEST, createTableRequest) @@ -284,7 +274,7 @@ public Response createTable( new PolarisEvent( PolarisEventType.AFTER_CREATE_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, createTableRequest.name()) @@ -307,7 +297,7 @@ public Response listTables( new PolarisEvent( PolarisEventType.BEFORE_LIST_TABLES, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); Response resp = @@ -316,7 +306,7 @@ public Response listTables( new PolarisEvent( PolarisEventType.AFTER_LIST_TABLES, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); return resp; @@ -338,7 +328,7 @@ public Response loadTable( new PolarisEvent( PolarisEventType.BEFORE_LOAD_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -355,12 +345,11 @@ public Response loadTable( snapshots, realmContext, securityContext); - polarisEventListener.onEvent( new PolarisEvent( PolarisEventType.AFTER_LOAD_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -381,7 +370,7 @@ public Response tableExists( new PolarisEvent( PolarisEventType.BEFORE_CHECK_EXISTS_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); @@ -390,7 +379,7 @@ public Response tableExists( new PolarisEvent( PolarisEventType.AFTER_CHECK_EXISTS_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); @@ -411,7 +400,7 @@ public Response dropTable( new PolarisEvent( PolarisEventType.BEFORE_DROP_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -422,7 +411,7 @@ public Response dropTable( new PolarisEvent( PolarisEventType.AFTER_DROP_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -443,7 +432,7 @@ public Response registerTable( new PolarisEvent( PolarisEventType.BEFORE_REGISTER_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.REGISTER_TABLE_REQUEST, registerTableRequest))); @@ -454,7 +443,7 @@ public Response registerTable( new PolarisEvent( PolarisEventType.AFTER_REGISTER_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, registerTableRequest.name()) @@ -473,7 +462,7 @@ public Response renameTable( new PolarisEvent( PolarisEventType.BEFORE_RENAME_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.RENAME_TABLE_REQUEST, renameTableRequest))); Response resp = delegate.renameTable(prefix, renameTableRequest, realmContext, securityContext); @@ -481,7 +470,7 @@ public Response renameTable( new PolarisEvent( PolarisEventType.AFTER_RENAME_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.RENAME_TABLE_REQUEST, renameTableRequest))); return resp; @@ -501,7 +490,7 @@ public Response updateTable( new PolarisEvent( PolarisEventType.BEFORE_UPDATE_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -513,14 +502,12 @@ public Response updateTable( new PolarisEvent( PolarisEventType.AFTER_UPDATE_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) .put(EventAttributes.UPDATE_TABLE_REQUEST, commitTableRequest) - .put( - EventAttributes.TABLE_METADATA, - ((LoadTableResponse) resp.getEntity()).tableMetadata()))); + .put(EventAttributes.LOAD_TABLE_RESPONSE, (LoadTableResponse) resp.getEntity()))); return resp; } @@ -537,7 +524,7 @@ public Response createView( new PolarisEvent( PolarisEventType.BEFORE_CREATE_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.CREATE_VIEW_REQUEST, createViewRequest))); @@ -547,7 +534,7 @@ public Response createView( new PolarisEvent( PolarisEventType.AFTER_CREATE_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, createViewRequest.name()) @@ -569,7 +556,7 @@ public Response listViews( new PolarisEvent( PolarisEventType.BEFORE_LIST_VIEWS, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); Response resp = @@ -578,7 +565,7 @@ public Response listViews( new PolarisEvent( PolarisEventType.AFTER_LIST_VIEWS, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); return resp; @@ -597,7 +584,7 @@ public Response loadCredentials( new PolarisEvent( PolarisEventType.BEFORE_LOAD_CREDENTIALS, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); @@ -607,7 +594,7 @@ public Response loadCredentials( new PolarisEvent( PolarisEventType.AFTER_LOAD_CREDENTIALS, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); @@ -627,7 +614,7 @@ public Response loadView( new PolarisEvent( PolarisEventType.BEFORE_LOAD_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -636,7 +623,7 @@ public Response loadView( new PolarisEvent( PolarisEventType.AFTER_LOAD_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view) @@ -657,7 +644,7 @@ public Response viewExists( new PolarisEvent( PolarisEventType.BEFORE_CHECK_EXISTS_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -666,7 +653,7 @@ public Response viewExists( new PolarisEvent( PolarisEventType.AFTER_CHECK_EXISTS_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -686,7 +673,7 @@ public Response dropView( new PolarisEvent( PolarisEventType.BEFORE_DROP_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -695,7 +682,7 @@ public Response dropView( new PolarisEvent( PolarisEventType.AFTER_DROP_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -713,7 +700,7 @@ public Response renameView( new PolarisEvent( PolarisEventType.BEFORE_RENAME_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.RENAME_TABLE_REQUEST, renameTableRequest))); Response resp = delegate.renameView(prefix, renameTableRequest, realmContext, securityContext); @@ -721,7 +708,7 @@ public Response renameView( new PolarisEvent( PolarisEventType.AFTER_RENAME_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.RENAME_TABLE_REQUEST, renameTableRequest))); return resp; @@ -741,7 +728,7 @@ public Response replaceView( new PolarisEvent( PolarisEventType.BEFORE_REPLACE_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view) @@ -753,7 +740,7 @@ public Response replaceView( new PolarisEvent( PolarisEventType.AFTER_REPLACE_VIEW, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view) @@ -773,7 +760,7 @@ public Response commitTransaction( new PolarisEvent( PolarisEventType.BEFORE_COMMIT_TRANSACTION, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.COMMIT_TRANSACTION_REQUEST, commitTransactionRequest))); for (UpdateTableRequest req : commitTransactionRequest.tableChanges()) { @@ -781,7 +768,7 @@ public Response commitTransaction( new PolarisEvent( PolarisEventType.BEFORE_UPDATE_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, req.identifier().namespace()) .put(EventAttributes.TABLE_NAME, req.identifier().name()) @@ -793,27 +780,19 @@ public Response commitTransaction( new PolarisEvent( PolarisEventType.AFTER_COMMIT_TRANSACTION, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.COMMIT_TRANSACTION_REQUEST, commitTransactionRequest))); - List tableMetadataList = - eventAttributeMap.getRequired(EventAttributes.TABLE_METADATAS); - for (int i = 0; i < commitTransactionRequest.tableChanges().size(); i++) { - UpdateTableRequest req = commitTransactionRequest.tableChanges().get(i); - TableMetadata tableMetadata = - tableMetadataList != null && i < tableMetadataList.size() - ? tableMetadataList.get(i) - : null; + for (UpdateTableRequest req : commitTransactionRequest.tableChanges()) { polarisEventListener.onEvent( new PolarisEvent( PolarisEventType.AFTER_UPDATE_TABLE, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, req.identifier().namespace()) .put(EventAttributes.TABLE_NAME, req.identifier().name()) - .put(EventAttributes.UPDATE_TABLE_REQUEST, req) - .put(EventAttributes.TABLE_METADATA, tableMetadata))); + .put(EventAttributes.UPDATE_TABLE_REQUEST, req))); } return resp; } @@ -826,7 +805,6 @@ public Response reportMetrics( ReportMetricsRequest reportMetricsRequest, RealmContext realmContext, SecurityContext securityContext) { - // Metrics processing is now handled directly in IcebergCatalogAdapter return delegate.reportMetrics( prefix, namespace, table, reportMetricsRequest, realmContext, securityContext); } @@ -845,7 +823,7 @@ public Response sendNotification( new PolarisEvent( PolarisEventType.BEFORE_SEND_NOTIFICATION, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -857,7 +835,7 @@ public Response sendNotification( new PolarisEvent( PolarisEventType.AFTER_SEND_NOTIFICATION, eventMetadataFactory.create(), - new EventAttributeMap() + new AttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 8df67c738f..60542df62c 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -503,4 +503,44 @@ public MetricsProcessor metricsProcessor( return processors.select(Identifier.Literal.of("noop")).get(); } } + + /** + * Produces the {@link MetricsProcessor} for metrics processing. + * + *

    This producer supports the new configuration path: {@code polaris.metrics.processor.type} + * + *

    The processor is selected based on the configured type using CDI {@link Identifier} + * annotations. Built-in processors include: + * + *

      + *
    • {@code noop} - Discards all metrics (default) + *
    • {@code logging} - Logs metrics to console + *
    • {@code persistence} - Persists to dedicated database tables + *
    + * + *

    Custom processors can be implemented by creating a CDI bean with an {@code @Identifier} + * annotation. + * + *

    Note: Implementations will be added in subsequent PRs. This producer provides the CDI + * infrastructure for the metrics processing system. + */ + @Produces + @ApplicationScoped + public MetricsProcessor metricsProcessor( + MetricsProcessorConfiguration config, @Any Instance processors) { + String type = config.type(); + LOGGER.info("Initializing metrics processor: type={}", type); + + try { + MetricsProcessor processor = processors.select(Identifier.Literal.of(type)).get(); + LOGGER.info("Successfully initialized metrics processor: {}", type); + return processor; + } catch (Exception e) { + LOGGER.error( + "Failed to instantiate metrics processor for type '{}': {}. Falling back to noop.", + type, + e.getMessage()); + return processors.select(Identifier.Literal.of("noop")).get(); + } + } } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java b/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java index 7638c4742d..436bc529ef 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java @@ -238,8 +238,4 @@ private EventAttributes() {} new AttributeKey<>("detach_policy_request", DetachPolicyRequest.class); public static final AttributeKey GET_APPLICABLE_POLICIES_RESPONSE = new AttributeKey<>("get_applicable_policies_response", GetApplicablePoliciesResponse.class); - - // Metrics reporting attributes - public static final AttributeKey REPORT_METRICS_REQUEST = - new AttributeKey<>("report_metrics_request", ReportMetricsRequest.class); } From 1488c3544656d22d412710f024dcfc4b6c933143 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Mon, 19 Jan 2026 09:49:41 -0800 Subject: [PATCH 24/67] Removed internal document --- .polaris-work-notes.md | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 .polaris-work-notes.md diff --git a/.polaris-work-notes.md b/.polaris-work-notes.md deleted file mode 100644 index cbd906acff..0000000000 --- a/.polaris-work-notes.md +++ /dev/null @@ -1,18 +0,0 @@ -# Polaris Work Notes - -## Active Branches - -### feat-3337-rest-catalog-metrics-table-merged -- **Remote**: `obelix74/polaris` (origin) -- **PR**: #3385 -- **Description**: Add metrics persistence with dual storage strategy for Iceberg table operations -- **Last Updated**: 2026-01-17 -- **Notes**: - - This branch is tracking `origin/feat-3337-rest-catalog-metrics-table-merged` - - Fixed failing tests by adding `ENABLE_METRICS_EVENT_EMISSION` feature flag - - To push: `git push origin feat-3337-rest-catalog-metrics-table-merged` - -## Git Remotes -- **origin**: https://github.com/obelix74/polaris.git (your fork) -- **upstream**: https://github.com/apache/polaris.git (Apache upstream) - From 17456d6fa05df130daf46120fc0dc09ba6dab660 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Mon, 19 Jan 2026 09:50:48 -0800 Subject: [PATCH 25/67] docs: Update CHANGELOG for metrics processor configuration - Remove ENABLE_METRICS_EVENT_EMISSION feature flag entry - Add polaris.metrics.processor.type configuration property --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d545a4839..d5c5c04a3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -97,7 +97,7 @@ request adding CHANGELOG notes for breaking (!) changes and possibly other secti ### New Features -- Added `ENABLE_METRICS_EVENT_EMISSION` feature flag (default: false) to control the emission of `BEFORE_REPORT_METRICS` and `AFTER_REPORT_METRICS` events when the Iceberg REST catalog API's `reportMetrics()` method is called. When enabled, event listeners can receive metrics report data for use cases like audit logging and metrics persistence. Can be configured via `polaris.features."ENABLE_METRICS_EVENT_EMISSION"=true`. +- Added `polaris.metrics.processor.type` configuration property to control metrics processing. Supports CDI-based processor selection via `@Identifier` annotations. Default value is `noop`. Processor implementations will be added in subsequent releases. - Added `--no-sts` flag to CLI to support S3-compatible storage systems that do not have Security Token Service available. - Support credential vending for federated catalogs. `ALLOW_FEDERATED_CATALOGS_CREDENTIAL_VENDING` (default: true) was added to toggle this feature. - Enhanced catalog federation with SigV4 authentication support, additional authentication types for credential vending, and location-based access restrictions to block credential vending for remote tables outside allowed location lists. From 95d5e2f009e08ca4665b698a11a8e8611de42a71 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Mon, 19 Jan 2026 10:50:40 -0800 Subject: [PATCH 26/67] SpotlessApply --- .../org/apache/polaris/service/events/EventAttributes.java | 1 - .../polaris/service/reporting/MetricsProcessingContext.java | 1 - .../apache/polaris/service/reporting/MetricsProcessor.java | 5 ++--- .../service/reporting/MetricsProcessorConfiguration.java | 1 - 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java b/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java index 436bc529ef..138133d62a 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/events/EventAttributes.java @@ -30,7 +30,6 @@ import org.apache.iceberg.rest.requests.CreateViewRequest; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RenameTableRequest; -import org.apache.iceberg.rest.requests.ReportMetricsRequest; import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; import org.apache.iceberg.rest.requests.UpdateTableRequest; import org.apache.iceberg.rest.responses.ConfigResponse; diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java index 629d43362f..71b50eb6d8 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java @@ -64,4 +64,3 @@ public interface MetricsProcessingContext { /** Timestamp when metrics were received (milliseconds since epoch) */ long timestampMs(); } - diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java index 0071df16cc..83ddcced28 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java @@ -26,8 +26,8 @@ * systems, or perform custom processing. * *

    Processors are discovered via CDI using the {@link io.smallrye.common.annotation.Identifier} - * annotation. Custom processors can be implemented and registered by annotating them with {@code - * @ApplicationScoped} and {@code @Identifier("custom-name")}. + * annotation. Custom processors can be implemented and registered by annotating them with + * {@code @ApplicationScoped} and {@code @Identifier("custom-name")}. * *

    Available built-in processors: * @@ -75,4 +75,3 @@ public interface MetricsProcessor { */ void process(MetricsProcessingContext context); } - diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java index ba55985058..1e3d6668e3 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java @@ -99,4 +99,3 @@ interface Retention { Duration cleanupInterval(); } } - From 3c17861e193de14f9822128d2862e3c9f7d0d9c6 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Mon, 19 Jan 2026 15:32:20 -0800 Subject: [PATCH 27/67] Review comments --- CHANGELOG.md | 1 - .../service/config/ServiceProducers.java | 42 -------- .../reporting/DefaultMetricsReporter.java | 1 - .../reporting/MetricsProcessingContext.java | 66 ------------ .../service/reporting/MetricsProcessor.java | 77 ------------- .../MetricsProcessorConfiguration.java | 101 ------------------ .../reporting/DefaultMetricsReporterTest.java | 1 - 7 files changed, 289 deletions(-) delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java diff --git a/CHANGELOG.md b/CHANGELOG.md index d5c5c04a3b..34ef7108a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -97,7 +97,6 @@ request adding CHANGELOG notes for breaking (!) changes and possibly other secti ### New Features -- Added `polaris.metrics.processor.type` configuration property to control metrics processing. Supports CDI-based processor selection via `@Identifier` annotations. Default value is `noop`. Processor implementations will be added in subsequent releases. - Added `--no-sts` flag to CLI to support S3-compatible storage systems that do not have Security Token Service available. - Support credential vending for federated catalogs. `ALLOW_FEDERATED_CATALOGS_CREDENTIAL_VENDING` (default: true) was added to toggle this feature. - Enhanced catalog federation with SigV4 authentication support, additional authentication types for credential vending, and location-based access restrictions to block credential vending for remote tables outside allowed location lists. diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 60542df62c..2647a350e6 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -81,8 +81,6 @@ import org.apache.polaris.service.ratelimiter.RateLimiterFilterConfiguration; import org.apache.polaris.service.ratelimiter.TokenBucketConfiguration; import org.apache.polaris.service.ratelimiter.TokenBucketFactory; -import org.apache.polaris.service.reporting.MetricsProcessor; -import org.apache.polaris.service.reporting.MetricsProcessorConfiguration; import org.apache.polaris.service.reporting.MetricsReportingConfiguration; import org.apache.polaris.service.reporting.PolarisMetricsReporter; import org.apache.polaris.service.secrets.SecretsManagerConfiguration; @@ -503,44 +501,4 @@ public MetricsProcessor metricsProcessor( return processors.select(Identifier.Literal.of("noop")).get(); } } - - /** - * Produces the {@link MetricsProcessor} for metrics processing. - * - *

    This producer supports the new configuration path: {@code polaris.metrics.processor.type} - * - *

    The processor is selected based on the configured type using CDI {@link Identifier} - * annotations. Built-in processors include: - * - *

      - *
    • {@code noop} - Discards all metrics (default) - *
    • {@code logging} - Logs metrics to console - *
    • {@code persistence} - Persists to dedicated database tables - *
    - * - *

    Custom processors can be implemented by creating a CDI bean with an {@code @Identifier} - * annotation. - * - *

    Note: Implementations will be added in subsequent PRs. This producer provides the CDI - * infrastructure for the metrics processing system. - */ - @Produces - @ApplicationScoped - public MetricsProcessor metricsProcessor( - MetricsProcessorConfiguration config, @Any Instance processors) { - String type = config.type(); - LOGGER.info("Initializing metrics processor: type={}", type); - - try { - MetricsProcessor processor = processors.select(Identifier.Literal.of(type)).get(); - LOGGER.info("Successfully initialized metrics processor: {}", type); - return processor; - } catch (Exception e) { - LOGGER.error( - "Failed to instantiate metrics processor for type '{}': {}. Falling back to noop.", - type, - e.getMessage()); - return processors.select(Identifier.Literal.of("noop")).get(); - } - } } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/DefaultMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/DefaultMetricsReporter.java index d08e9b0b46..eaff0219b4 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/DefaultMetricsReporter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/DefaultMetricsReporter.java @@ -74,4 +74,3 @@ public void reportMetric( reportConsumer.accept(catalogName, table, metricsReport, receivedTimestamp); } } - diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java deleted file mode 100644 index 71b50eb6d8..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessingContext.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import java.util.Optional; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.MetricsReport; -import org.immutables.value.Value; - -/** - * Context information for metrics processing, providing access to request metadata, security - * context, and tracing information. - * - *

    This immutable context object contains all the information needed to process and persist - * Iceberg metrics reports, including catalog and table identifiers, the metrics report itself, and - * associated metadata like principal name, request ID, and OpenTelemetry trace context. - */ -@Value.Immutable -public interface MetricsProcessingContext { - - /** The catalog name where the metrics originated */ - String catalogName(); - - /** The table identifier */ - TableIdentifier tableIdentifier(); - - /** The Iceberg metrics report (ScanReport or CommitReport) */ - MetricsReport metricsReport(); - - /** The realm ID */ - String realmId(); - - /** The catalog ID (internal entity ID) */ - Optional catalogId(); - - /** The principal name who submitted the metrics */ - Optional principalName(); - - /** The request ID for correlation */ - Optional requestId(); - - /** OpenTelemetry trace ID */ - Optional otelTraceId(); - - /** OpenTelemetry span ID */ - Optional otelSpanId(); - - /** Timestamp when metrics were received (milliseconds since epoch) */ - long timestampMs(); -} diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java deleted file mode 100644 index 83ddcced28..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessor.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -/** - * Interface for processing Iceberg metrics reports in Polaris. - * - *

    This interface provides a pluggable mechanism for handling metrics reports from Iceberg table - * operations. Implementations can persist metrics to various backends, forward them to external - * systems, or perform custom processing. - * - *

    Processors are discovered via CDI using the {@link io.smallrye.common.annotation.Identifier} - * annotation. Custom processors can be implemented and registered by annotating them with - * {@code @ApplicationScoped} and {@code @Identifier("custom-name")}. - * - *

    Available built-in processors: - * - *

      - *
    • {@code noop} - Discards all metrics (default) - *
    • {@code logging} - Logs metrics to console for debugging - *
    • {@code persistence} - Persists to dedicated metrics tables - *
    - * - *

    Example configuration: - * - *

    - * polaris:
    - *   metrics:
    - *     processor:
    - *       type: persistence
    - * 
    - * - *

    Custom implementations should be annotated with: - * - *

    - * {@literal @}ApplicationScoped
    - * {@literal @}Identifier("custom-processor")
    - * public class CustomMetricsProcessor implements MetricsProcessor {
    - *   {@literal @}Override
    - *   public void process(MetricsProcessingContext context) {
    - *     // implementation
    - *   }
    - * }
    - * 
    - * - * @see MetricsProcessingContext - * @see MetricsProcessorConfiguration - */ -public interface MetricsProcessor { - - /** - * Process a metrics report with full context information. - * - *

    Implementations should handle exceptions gracefully and not throw exceptions that would - * disrupt the metrics reporting flow. Errors should be logged and metrics about processing - * failures should be emitted. - * - * @param context the complete context for metrics processing - */ - void process(MetricsProcessingContext context); -} diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java deleted file mode 100644 index 1e3d6668e3..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsProcessorConfiguration.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import io.smallrye.config.ConfigMapping; -import io.smallrye.config.WithDefault; -import java.time.Duration; -import java.util.Optional; - -/** - * Configuration for metrics processing in Polaris. - * - *

    This configuration controls how Iceberg metrics reports are processed and persisted. The - * processor type determines which implementation is used. - * - *

    Example configuration: - * - *

    - * polaris:
    - *   metrics:
    - *     processor:
    - *       type: persistence
    - *       retention:
    - *         enabled: true
    - *         retention-period: P30D
    - *         cleanup-interval: PT6H
    - * 
    - */ -@ConfigMapping(prefix = "polaris.metrics.processor") -public interface MetricsProcessorConfiguration { - - /** - * The type of metrics processor to use. - * - *

    Supported built-in values: - * - *

      - *
    • {@code noop} - No processing, discards all metrics (default) - *
    • {@code logging} - Log metrics to console for debugging - *
    • {@code persistence} - Persist to dedicated metrics tables - *
    - * - *

    Custom processor types can be specified if a corresponding {@link MetricsProcessor} - * implementation is available with a matching {@link io.smallrye.common.annotation.Identifier}. - * - * @return the processor type identifier - */ - @WithDefault("noop") - String type(); - - /** - * Retention policy configuration for persisted metrics reports. - * - * @return the retention configuration - */ - Optional retention(); - - /** Retention policy configuration for metrics reports. */ - interface Retention { - - /** - * Whether automatic cleanup of old metrics reports is enabled. - * - * @return true if cleanup is enabled - */ - @WithDefault("false") - boolean enabled(); - - /** - * How long to retain metrics reports before they are eligible for deletion. - * - * @return the retention period (default: 30 days) - */ - @WithDefault("P30D") - Duration retentionPeriod(); - - /** - * How often to run the cleanup job. - * - * @return the cleanup interval (default: 6 hours) - */ - @WithDefault("PT6H") - Duration cleanupInterval(); - } -} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/DefaultMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/DefaultMetricsReporterTest.java index a8b66a677b..8762c3ed74 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/reporting/DefaultMetricsReporterTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/DefaultMetricsReporterTest.java @@ -44,4 +44,3 @@ void testLogging() { verify(mockConsumer).accept(warehouse, table, metricsReport, receivedTimestamp); } } - From e685f4006ab697df5a7b3311b6021a07100365d4 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Mon, 19 Jan 2026 15:52:14 -0800 Subject: [PATCH 28/67] Review comments --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34ef7108a6..74022d7fc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,6 @@ request adding CHANGELOG notes for breaking (!) changes and possibly other secti ### Breaking changes -- The (Before/After)CommitViewEvent has been removed. - The (Before/After)CommitTableEvent has been removed. - The `PolarisMetricsReporter.reportMetric()` method signature has been extended to include a `receivedTimestamp` parameter of type `java.time.Instant`. From bf934a0759acbe6056e730e22a79bb00a19d3a68 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Mon, 19 Jan 2026 16:36:31 -0800 Subject: [PATCH 29/67] Added a service producer for SpanContext --- .../service/config/ServiceProducers.java | 70 +------------------ 1 file changed, 3 insertions(+), 67 deletions(-) diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 2647a350e6..121eb382c1 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -31,8 +31,6 @@ import jakarta.enterprise.inject.Produces; import jakarta.inject.Singleton; import java.time.Clock; -import java.util.ArrayList; -import java.util.List; import java.util.stream.Collectors; import org.apache.polaris.core.PolarisCallContext; import org.apache.polaris.core.PolarisDefaultDiagServiceImpl; @@ -275,7 +273,7 @@ public StsClientsPool stsClientsPool( */ public void maybeBootstrap( @Observes Startup event, - MetaStoreManagerFactory factory, + Bootstrapper bootstrapper, PersistenceConfiguration config, RealmContextConfiguration realmContextConfiguration) { var rootCredentialsSet = RootCredentialsSet.fromEnvironment(); @@ -289,7 +287,7 @@ public void maybeBootstrap( RootCredentialsSet.ENVIRONMENT_VARIABLE, RootCredentialsSet.SYSTEM_PROPERTY); - var result = factory.bootstrapRealms(realmIds, rootCredentialsSet); + var result = bootstrapper.bootstrapRealms(realmIds, rootCredentialsSet); result.forEach( (realm, secrets) -> { @@ -433,72 +431,10 @@ public void closeTaskExecutor(@Disposes @Identifier("task-executor") ManagedExec executor.close(); } - /** - * Produces the legacy {@link PolarisMetricsReporter} for backward compatibility. - * - *

    This producer supports the old configuration path: {@code - * polaris.iceberg-metrics.reporting.type} - * - *

    The reporter is selected based on the configured type using CDI {@link Identifier} - * annotations. - */ @Produces @ApplicationScoped public PolarisMetricsReporter metricsReporter( MetricsReportingConfiguration config, @Any Instance reporters) { - String type = config.type(); - LOGGER.info("Initializing legacy metrics reporter: type={}", type); - - try { - PolarisMetricsReporter reporter = reporters.select(Identifier.Literal.of(type)).get(); - LOGGER.info("Successfully initialized legacy metrics reporter: {}", type); - return reporter; - } catch (Exception e) { - LOGGER.error( - "Failed to instantiate metrics reporter for type '{}': {}. Falling back to default.", - type, - e.getMessage()); - return reporters.select(Identifier.Literal.of("default")).get(); - } - } - - /** - * Produces the new {@link MetricsProcessor} for metrics processing. - * - *

    This producer supports the new configuration path: {@code polaris.metrics.processor.type} - * - *

    The processor is selected based on the configured type using CDI {@link Identifier} - * annotations. If the new configuration is not specified, it falls back to using the legacy - * {@link PolarisMetricsReporter} via the "legacy" processor. - */ - @Produces - @ApplicationScoped - public MetricsProcessor metricsProcessor( - MetricsProcessorConfiguration processorConfig, - MetricsReportingConfiguration reporterConfig, - @Any Instance processors) { - String type = processorConfig.type(); - LOGGER.info("Initializing metrics processor: type={}", type); - - // If using default "noop" processor but old config is set, use legacy processor - if ("noop".equals(type) && !"default".equals(reporterConfig.type())) { - LOGGER.info( - "New processor config is 'noop' but legacy reporter config is '{}'. " - + "Using legacy processor for backward compatibility.", - reporterConfig.type()); - type = "legacy"; - } - - try { - MetricsProcessor processor = processors.select(Identifier.Literal.of(type)).get(); - LOGGER.info("Successfully initialized metrics processor: {}", type); - return processor; - } catch (Exception e) { - LOGGER.error( - "Failed to instantiate metrics processor for type '{}': {}. Falling back to noop.", - type, - e.getMessage()); - return processors.select(Identifier.Literal.of("noop")).get(); - } + return reporters.select(Identifier.Literal.of(config.type())).get(); } } From 266fa7e0758b9fe9110bb7d237f7e49966938be3 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Tue, 20 Jan 2026 12:41:58 -0800 Subject: [PATCH 30/67] Review comments. --- .../events/listeners/TestPolarisEventListener.java | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java b/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java index a0973750b5..11d8a4dae8 100644 --- a/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java +++ b/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java @@ -44,16 +44,4 @@ public PolarisEvent getLatest(PolarisEventType type) { return latest; } - /** - * Returns the latest event of the specified type, or null if no such event has been recorded. - * This is useful for tests that need to verify no event was emitted. - */ - public PolarisEvent getLatestOrNull(PolarisEventType type) { - return latestEvents.get(type); - } - - /** Returns true if an event of the specified type has been recorded. */ - public boolean hasEvent(PolarisEventType type) { - return latestEvents.containsKey(type); - } } From da125d89bbefe6535ec18d204b3838bc0e6fcc1d Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 21 Jan 2026 08:47:01 -0800 Subject: [PATCH 31/67] refactor: Remove PR2 files (MetricsProcessor framework) from PR1 branch This commit removes the MetricsProcessor framework files that belong to PR2: - MetricsProcessor interface and implementations (Noop, Logging, Legacy, Persistence) - MetricsProcessorConfiguration - MetricsProcessingContext - MetricsReportCleanupService - MetricsReporterToProcessorAdapter - telemetry.md documentation PR1 now contains only the database schema and core persistence layer: - Schema files (h2/postgres schema-v4.sql) - Model classes (ModelScanMetricsReport, ModelCommitMetricsReport) - Converters (MetricsReportConverter, ModelScanMetricsReportConverter, ModelCommitMetricsReportConverter) - Persistence implementation (JdbcBasePersistenceImpl, QueryGenerator) - Tests for the persistence layer --- .../src/main/resources/application.properties | 5 +- runtime/service/build.gradle.kts | 3 +- .../reporting/LegacyMetricsProcessor.java | 78 ---- .../reporting/LoggingMetricsProcessor.java | 81 ---- .../MetricsReportCleanupService.java | 198 --------- .../MetricsReporterToProcessorAdapter.java | 60 --- .../reporting/NoopMetricsProcessor.java | 66 --- .../PersistenceMetricsProcessor.java | 179 --------- site/content/in-dev/unreleased/telemetry.md | 375 ------------------ 9 files changed, 2 insertions(+), 1043 deletions(-) delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/LegacyMetricsProcessor.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/LoggingMetricsProcessor.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReporterToProcessorAdapter.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/NoopMetricsProcessor.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistenceMetricsProcessor.java delete mode 100644 site/content/in-dev/unreleased/telemetry.md diff --git a/runtime/defaults/src/main/resources/application.properties b/runtime/defaults/src/main/resources/application.properties index 60a658e028..11dd4449f4 100644 --- a/runtime/defaults/src/main/resources/application.properties +++ b/runtime/defaults/src/main/resources/application.properties @@ -266,11 +266,8 @@ polaris.oidc.principal-roles-mapper.type=default # Polaris Credential Manager Config polaris.credential-manager.type=default -# Configuration for metrics processing -# Legacy configuration (backward compatible with main branch) +# Configuration for the behaviour of the metrics endpoint polaris.iceberg-metrics.reporting.type=default -# New configuration (optional, defaults to noop) -polaris.metrics.processor.type=noop # Set to INFO if you want to see iceberg metric reports logged quarkus.log.category."org.apache.polaris.service.reporting".level=OFF diff --git a/runtime/service/build.gradle.kts b/runtime/service/build.gradle.kts index 134ea0e2db..11d1b7a481 100644 --- a/runtime/service/build.gradle.kts +++ b/runtime/service/build.gradle.kts @@ -31,7 +31,7 @@ dependencies { implementation(project(":polaris-api-iceberg-service")) implementation(project(":polaris-api-catalog-service")) - implementation(project(":polaris-relational-jdbc")) + runtimeOnly(project(":polaris-relational-jdbc")) implementation(project(":polaris-runtime-defaults")) implementation(project(":polaris-runtime-common")) @@ -62,7 +62,6 @@ dependencies { implementation("io.quarkus:quarkus-micrometer-registry-prometheus") implementation("io.quarkus:quarkus-oidc") implementation("io.quarkus:quarkus-opentelemetry") - implementation("io.quarkus:quarkus-scheduler") implementation("io.quarkus:quarkus-security") implementation("io.quarkus:quarkus-smallrye-context-propagation") implementation("io.quarkus:quarkus-smallrye-fault-tolerance") diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/LegacyMetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/LegacyMetricsProcessor.java deleted file mode 100644 index 3e97aedea8..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/LegacyMetricsProcessor.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import io.smallrye.common.annotation.Identifier; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A {@link MetricsProcessor} that delegates to the legacy {@link PolarisMetricsReporter} for - * backward compatibility. - * - *

    This processor is used when the old configuration path {@code - * polaris.iceberg-metrics.reporting.type} is specified. It wraps the configured {@link - * PolarisMetricsReporter} and adapts it to the new {@link MetricsProcessor} interface. - * - *

    This allows existing configurations to continue working without changes during the migration - * to the new metrics processing system. - * - *

    To use this processor with the new configuration: - * - *

    - * polaris:
    - *   metrics:
    - *     processor:
    - *       type: legacy
    - * 
    - * - *

    Or continue using the old configuration (automatically mapped to this processor): - * - *

    - * polaris:
    - *   iceberg-metrics:
    - *     reporting:
    - *       type: default
    - * 
    - */ -@ApplicationScoped -@Identifier("legacy") -public class LegacyMetricsProcessor implements MetricsProcessor { - - private static final Logger LOGGER = LoggerFactory.getLogger(LegacyMetricsProcessor.class); - - private final PolarisMetricsReporter reporter; - - @Inject - public LegacyMetricsProcessor(PolarisMetricsReporter reporter) { - this.reporter = reporter; - LOGGER.info( - "LegacyMetricsProcessor initialized with reporter: {}", reporter.getClass().getName()); - } - - @Override - public void process(MetricsProcessingContext context) { - // Delegate to the legacy reporter with basic parameters - reporter.reportMetric( - context.catalogName(), context.tableIdentifier(), context.metricsReport()); - } -} - diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/LoggingMetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/LoggingMetricsProcessor.java deleted file mode 100644 index 584ef921ca..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/LoggingMetricsProcessor.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import io.smallrye.common.annotation.Identifier; -import jakarta.enterprise.context.ApplicationScoped; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A {@link MetricsProcessor} implementation that logs metrics to the console. - * - *

    This processor logs all metrics reports at INFO level, including the full context - * information such as realm ID, principal, request ID, and OpenTelemetry trace context. - * - *

    This processor is useful for: - * - *

      - *
    • Development and debugging - *
    • Understanding what metrics are being reported - *
    • Troubleshooting metrics processing issues - *
    - * - *

    Configuration: - * - *

    - * polaris:
    - *   metrics:
    - *     processor:
    - *       type: logging
    - * 
    - * - *

    To see the logs, ensure the logging level is set appropriately: - * - *

    - * quarkus.log.category."org.apache.polaris.service.reporting".level=INFO
    - * 
    - */ -@ApplicationScoped -@Identifier("logging") -public class LoggingMetricsProcessor implements MetricsProcessor { - - private static final Logger LOGGER = LoggerFactory.getLogger(LoggingMetricsProcessor.class); - - public LoggingMetricsProcessor() { - LOGGER.info("LoggingMetricsProcessor initialized - metrics will be logged to console"); - } - - @Override - public void process(MetricsProcessingContext context) { - LOGGER.info( - "Metrics Report: catalog={}, table={}, realm={}, principal={}, requestId={}, " - + "traceId={}, spanId={}, timestamp={}, report={}", - context.catalogName(), - context.tableIdentifier(), - context.realmId(), - context.principalName().orElse("unknown"), - context.requestId().orElse("none"), - context.otelTraceId().orElse("none"), - context.otelSpanId().orElse("none"), - context.timestampMs(), - context.metricsReport()); - } -} - diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java deleted file mode 100644 index 8ddbcbdc1f..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportCleanupService.java +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import io.quarkus.runtime.Startup; -import io.quarkus.scheduler.Scheduled; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; -import java.time.Duration; -import java.time.Instant; -import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.polaris.core.context.RealmContext; -import org.apache.polaris.core.persistence.BasePersistence; -import org.apache.polaris.core.persistence.MetaStoreManagerFactory; -import org.apache.polaris.persistence.relational.jdbc.JdbcBasePersistenceImpl; -import org.apache.polaris.service.context.RealmContextConfiguration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Scheduled service that cleans up old metrics reports based on the configured retention policy. - * - *

    This service runs periodically and deletes metrics reports that are older than the configured - * retention period. It only operates when the persistence backend is relational-jdbc. - * - *

    Configuration example: - * - *

    - * polaris:
    - *   metrics:
    - *     processor:
    - *       type: persistence
    - *       retention:
    - *         enabled: true
    - *         retention-period: P30D  # 30 days
    - *         cleanup-interval: PT6H  # every 6 hours
    - * 
    - */ -@ApplicationScoped -@Startup -public class MetricsReportCleanupService { - - private static final Logger LOGGER = LoggerFactory.getLogger(MetricsReportCleanupService.class); - - private final MetricsProcessorConfiguration config; - private final MetaStoreManagerFactory metaStoreManagerFactory; - private final RealmContextConfiguration realmContextConfiguration; - private final AtomicBoolean running = new AtomicBoolean(false); - - @Inject - public MetricsReportCleanupService( - MetricsProcessorConfiguration config, - MetaStoreManagerFactory metaStoreManagerFactory, - RealmContextConfiguration realmContextConfiguration) { - this.config = config; - this.metaStoreManagerFactory = metaStoreManagerFactory; - this.realmContextConfiguration = realmContextConfiguration; - - if (config.retention().isPresent() && config.retention().get().enabled()) { - LOGGER.info( - "Metrics report cleanup enabled with retention period: {}, cleanup interval: {}", - config.retention().get().retentionPeriod(), - config.retention().get().cleanupInterval()); - } else { - LOGGER.debug("Metrics report cleanup is disabled"); - } - } - - /** - * Scheduled cleanup job that runs at the configured interval. The actual interval is configured - * via the retention.cleanup-interval property. - */ - @Scheduled(every = "${polaris.metrics.processor.retention.cleanup-interval:6h}") - public void cleanupOldMetricsReports() { - if (config.retention().isEmpty() || !config.retention().get().enabled()) { - LOGGER.trace("Metrics cleanup is disabled, skipping"); - return; - } - - // Prevent concurrent runs - if (!running.compareAndSet(false, true)) { - LOGGER.debug("Cleanup already in progress, skipping this run"); - return; - } - - try { - performCleanup(); - } finally { - running.set(false); - } - } - - private void performCleanup() { - Duration retentionPeriod = config.retention().get().retentionPeriod(); - long cutoffTimestamp = Instant.now().minus(retentionPeriod).toEpochMilli(); - List realmIds = realmContextConfiguration.realms(); - - LOGGER.info( - "Starting metrics report cleanup across {} realm(s). Deleting reports older than {} (cutoff: {})", - realmIds.size(), - retentionPeriod, - Instant.ofEpochMilli(cutoffTimestamp)); - - int totalDeleted = 0; - for (String realmId : realmIds) { - try { - int deletedCount = cleanupForRealm(realmId, cutoffTimestamp); - if (deletedCount > 0) { - LOGGER.info("Deleted {} old metrics reports from realm '{}'", deletedCount, realmId); - totalDeleted += deletedCount; - } else { - LOGGER.debug("No old metrics reports to delete in realm '{}'", realmId); - } - } catch (Exception e) { - LOGGER.error( - "Failed to cleanup old metrics reports for realm '{}': {}", realmId, e.getMessage(), e); - } - } - - if (totalDeleted > 0) { - LOGGER.info("Total deleted metrics reports across all realms: {}", totalDeleted); - } - } - - private int cleanupForRealm(String realmId, long cutoffTimestamp) { - RealmContext realmContext = () -> realmId; - BasePersistence session = metaStoreManagerFactory.getOrCreateSession(realmContext); - - if (!(session instanceof JdbcBasePersistenceImpl jdbcPersistence)) { - LOGGER.debug( - "Metrics cleanup is only supported with relational-jdbc backend. " - + "Current backend: {} for realm '{}'", - session.getClass().getSimpleName(), - realmId); - return 0; - } - - return jdbcPersistence.deleteAllMetricsReportsOlderThan(cutoffTimestamp); - } - - /** - * Manually trigger a cleanup across all realms. This can be called from an admin endpoint or for - * testing. - * - * @return the total number of reports deleted across all realms, or -1 if cleanup is disabled or - * failed - */ - public int triggerCleanup() { - if (config.retention().isEmpty() || !config.retention().get().enabled()) { - LOGGER.warn("Cannot trigger cleanup: retention is disabled"); - return -1; - } - - if (!running.compareAndSet(false, true)) { - LOGGER.warn("Cannot trigger cleanup: cleanup already in progress"); - return -1; - } - - try { - Duration retentionPeriod = config.retention().get().retentionPeriod(); - long cutoffTimestamp = Instant.now().minus(retentionPeriod).toEpochMilli(); - List realmIds = realmContextConfiguration.realms(); - - int totalDeleted = 0; - for (String realmId : realmIds) { - try { - int deletedCount = cleanupForRealm(realmId, cutoffTimestamp); - totalDeleted += deletedCount; - } catch (Exception e) { - LOGGER.error("Failed to cleanup metrics for realm '{}': {}", realmId, e.getMessage(), e); - } - } - return totalDeleted; - } catch (Exception e) { - LOGGER.error("Failed to trigger cleanup: {}", e.getMessage(), e); - return -1; - } finally { - running.set(false); - } - } -} diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReporterToProcessorAdapter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReporterToProcessorAdapter.java deleted file mode 100644 index b674dd709c..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReporterToProcessorAdapter.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.MetricsReport; - -/** - * Adapter that bridges the legacy {@link PolarisMetricsReporter} interface to the new {@link - * MetricsProcessor} interface. - * - *

    This adapter allows existing {@link PolarisMetricsReporter} implementations to work with the - * new {@link MetricsProcessor} system, providing backward compatibility during the migration - * period. - * - *

    The adapter converts the simple {@link PolarisMetricsReporter#reportMetric(String, - * TableIdentifier, MetricsReport)} call into a full {@link MetricsProcessingContext} by extracting - * available information from the current request context. - */ -public class MetricsReporterToProcessorAdapter implements MetricsProcessor { - - private final PolarisMetricsReporter reporter; - - public MetricsReporterToProcessorAdapter(PolarisMetricsReporter reporter) { - this.reporter = reporter; - } - - @Override - public void process(MetricsProcessingContext context) { - // Delegate to the legacy reporter interface with just the basic parameters - reporter.reportMetric( - context.catalogName(), context.tableIdentifier(), context.metricsReport()); - } - - /** - * Get the underlying legacy reporter. - * - * @return the wrapped PolarisMetricsReporter - */ - public PolarisMetricsReporter getReporter() { - return reporter; - } -} - diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/NoopMetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/NoopMetricsProcessor.java deleted file mode 100644 index d73475a28b..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/NoopMetricsProcessor.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import io.smallrye.common.annotation.Identifier; -import jakarta.enterprise.context.ApplicationScoped; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A no-op implementation of {@link MetricsProcessor} that discards all metrics. - * - *

    This is the default processor when no specific type is configured. It performs no processing - * and simply discards all metrics reports. - * - *

    This processor is useful when: - * - *

      - *
    • Metrics processing is not needed - *
    • You want to disable metrics processing temporarily - *
    • You're testing and don't want metrics overhead - *
    - * - *

    Configuration: - * - *

    - * polaris:
    - *   metrics:
    - *     processor:
    - *       type: noop
    - * 
    - */ -@ApplicationScoped -@Identifier("noop") -public class NoopMetricsProcessor implements MetricsProcessor { - - private static final Logger LOGGER = LoggerFactory.getLogger(NoopMetricsProcessor.class); - - public NoopMetricsProcessor() { - LOGGER.debug("NoopMetricsProcessor initialized - all metrics will be discarded"); - } - - @Override - public void process(MetricsProcessingContext context) { - // Intentionally do nothing - discard all metrics - LOGGER.trace( - "Discarding metrics for {}.{}", context.catalogName(), context.tableIdentifier()); - } -} - diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistenceMetricsProcessor.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistenceMetricsProcessor.java deleted file mode 100644 index 21c18d5cde..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistenceMetricsProcessor.java +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.reporting; - -import io.smallrye.common.annotation.Identifier; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; -import org.apache.iceberg.metrics.CommitReport; -import org.apache.iceberg.metrics.ScanReport; -import org.apache.polaris.core.context.RealmContext; -import org.apache.polaris.core.persistence.BasePersistence; -import org.apache.polaris.core.persistence.MetaStoreManagerFactory; -import org.apache.polaris.persistence.relational.jdbc.JdbcBasePersistenceImpl; -import org.apache.polaris.persistence.relational.jdbc.models.MetricsReportConverter; -import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; -import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; -import org.apache.polaris.service.context.RealmContextConfiguration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A {@link MetricsProcessor} that persists metrics to dedicated database tables. - * - *

    This processor stores Iceberg metrics reports in dedicated tables: - * - *

      - *
    • {@code scan_metrics_report} - For ScanReport metrics - *
    • {@code commit_metrics_report} - For CommitReport metrics - *
    - * - *

    The processor includes full context information such as realm ID, catalog ID, principal name, - * request ID, and OpenTelemetry trace context for correlation and analysis. - * - *

    Requirements: - * - *

      - *
    • Requires JDBC-based persistence backend ({@code polaris.persistence.type=relational-jdbc}) - *
    • Database schema must include metrics tables (created via Flyway migrations) - *
    - * - *

    Configuration: - * - *

    - * polaris:
    - *   metrics:
    - *     processor:
    - *       type: persistence
    - *       retention:
    - *         enabled: true
    - *         retention-period: P30D
    - *         cleanup-interval: PT6H
    - * 
    - */ -@ApplicationScoped -@Identifier("persistence") -public class PersistenceMetricsProcessor implements MetricsProcessor { - - private static final Logger LOGGER = LoggerFactory.getLogger(PersistenceMetricsProcessor.class); - - private final MetaStoreManagerFactory metaStoreManagerFactory; - private final RealmContextConfiguration realmContextConfiguration; - - @Inject - public PersistenceMetricsProcessor( - MetaStoreManagerFactory metaStoreManagerFactory, - RealmContextConfiguration realmContextConfiguration) { - this.metaStoreManagerFactory = metaStoreManagerFactory; - this.realmContextConfiguration = realmContextConfiguration; - LOGGER.info("PersistenceMetricsProcessor initialized - metrics will be persisted to database"); - } - - @Override - public void process(MetricsProcessingContext context) { - try { - // Get the persistence session for the realm - String realmId = context.realmId(); - RealmContext realmContext = () -> realmId; - BasePersistence session = metaStoreManagerFactory.getOrCreateSession(realmContext); - - // Only JDBC persistence supports metrics tables - if (!(session instanceof JdbcBasePersistenceImpl jdbcPersistence)) { - LOGGER.warn( - "Persistence metrics processor requires JDBC persistence backend. " - + "Current backend: {}. Metrics will not be persisted.", - session.getClass().getSimpleName()); - return; - } - - // Persist based on report type - if (context.metricsReport() instanceof ScanReport scanReport) { - persistScanReport(jdbcPersistence, context, scanReport); - } else if (context.metricsReport() instanceof CommitReport commitReport) { - persistCommitReport(jdbcPersistence, context, commitReport); - } else { - LOGGER.warn( - "Unknown metrics report type: {}. Metrics will not be persisted.", - context.metricsReport().getClass().getName()); - } - } catch (Exception e) { - LOGGER.error( - "Failed to persist metrics for {}.{}: {}", - context.catalogName(), - context.tableIdentifier(), - e.getMessage(), - e); - } - } - - private void persistScanReport( - JdbcBasePersistenceImpl jdbcPersistence, - MetricsProcessingContext context, - ScanReport scanReport) { - try { - String namespace = context.tableIdentifier().namespace().toString(); - String catalogId = context.catalogId().map(String::valueOf).orElse(null); - - ModelScanMetricsReport modelReport = - MetricsReportConverter.fromScanReport( - scanReport, - context.realmId(), - catalogId, - context.catalogName(), - namespace, - context.principalName().orElse(null), - context.requestId().orElse(null), - context.otelTraceId().orElse(null), - context.otelSpanId().orElse(null)); - - jdbcPersistence.writeScanMetricsReport(modelReport); - LOGGER.debug("Persisted scan metrics for {}.{}", context.catalogName(), context.tableIdentifier()); - } catch (Exception e) { - LOGGER.error("Failed to persist scan metrics: {}", e.getMessage(), e); - } - } - - private void persistCommitReport( - JdbcBasePersistenceImpl jdbcPersistence, - MetricsProcessingContext context, - CommitReport commitReport) { - try { - String namespace = context.tableIdentifier().namespace().toString(); - String catalogId = context.catalogId().map(String::valueOf).orElse(null); - - ModelCommitMetricsReport modelReport = - MetricsReportConverter.fromCommitReport( - commitReport, - context.realmId(), - catalogId, - context.catalogName(), - namespace, - context.principalName().orElse(null), - context.requestId().orElse(null), - context.otelTraceId().orElse(null), - context.otelSpanId().orElse(null)); - - jdbcPersistence.writeCommitMetricsReport(modelReport); - LOGGER.debug("Persisted commit metrics for {}.{}", context.catalogName(), context.tableIdentifier()); - } catch (Exception e) { - LOGGER.error("Failed to persist commit metrics: {}", e.getMessage(), e); - } - } -} - diff --git a/site/content/in-dev/unreleased/telemetry.md b/site/content/in-dev/unreleased/telemetry.md deleted file mode 100644 index 7edcc2d603..0000000000 --- a/site/content/in-dev/unreleased/telemetry.md +++ /dev/null @@ -1,375 +0,0 @@ ---- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -title: Telemetry -type: docs -weight: 450 ---- - -## Metrics - -Metrics are published using [Micrometer]; they are available from Polaris's management interface -(port 8182 by default) under the path `/q/metrics`. For example, if the server is running on -localhost, the metrics can be accessed via http://localhost:8182/q/metrics. - -[Micrometer]: https://quarkus.io/guides/telemetry-micrometer - -Metrics can be scraped by Prometheus or any compatible metrics scraping server. See: -[Prometheus](https://prometheus.io) for more information. - -Additional tags can be added to the metrics by setting the `polaris.metrics.tags.*` property. Each -tag is a key-value pair, where the key is the tag name and the value is the tag value. For example, -to add a tag `environment=prod` to all metrics, set `polaris.metrics.tags.environment=prod`. Many -tags can be added, such as below: - -```properties -polaris.metrics.tags.service=polaris -polaris.metrics.tags.environment=prod -polaris.metrics.tags.region=us-west-2 -``` - -Note that by default Polaris adds one tag: `application=Polaris`. You can override this tag by -setting the `polaris.metrics.tags.application=` property. - -### Realm ID Tag - -Polaris can add the realm ID as a tag to all API and HTTP request metrics. This is disabled by -default to prevent high cardinality issues, but can be enabled by setting the following properties: - -```properties -polaris.metrics.realm-id-tag.enable-in-api-metrics=true -polaris.metrics.realm-id-tag.enable-in-http-metrics=true -``` - -You should be particularly careful when enabling the realm ID tag in HTTP request metrics, as these -metrics typically have a much higher cardinality than API request metrics. - -In order to prevent the number of tags from growing indefinitely and causing performance issues or -crashing the server, the number of unique realm IDs in HTTP request metrics is limited to 100 by -default. If the number of unique realm IDs exceeds this value, a warning will be logged and no more -HTTP request metrics will be recorded. This threshold can be changed by setting the -`polaris.metrics.realm-id-tag.http-metrics-max-cardinality` property. - -## Traces - -Traces are published using [OpenTelemetry]. - -[OpenTelemetry]: https://quarkus.io/guides/opentelemetry-tracing - -By default OpenTelemetry is disabled in Polaris, because there is no reasonable default -for the collector endpoint for all cases. - -To enable OpenTelemetry and publish traces for Polaris set `quarkus.otel.sdk.disabled=false` -and configure a valid collector endpoint URL with `http://` or `https://` as the server property -`quarkus.otel.exporter.otlp.traces.endpoint`. - -_If these properties are not set, the server will not publish traces._ - -The collector must talk the OpenTelemetry protocol (OTLP) and the port must be its gRPC port -(by default 4317), e.g. "http://otlp-collector:4317". - -By default, Polaris adds a few attributes to the [OpenTelemetry Resource] to identify the server, -and notably: - -- `service.name`: set to `Apache Polaris Server (incubating)`; -- `service.version`: set to the Polaris version. - -[OpenTelemetry Resource]: https://opentelemetry.io/docs/languages/js/resources/ - -You can override the default resource attributes or add additional ones by setting the -`quarkus.otel.resource.attributes` property. - -This property expects a comma-separated list of key-value pairs, where the key is the attribute name -and the value is the attribute value. For example, to change the service name to `Polaris` and add -an attribute `deployment.environment=dev`, set the following property: - -```properties -quarkus.otel.resource.attributes=service.name=Polaris,deployment.environment=dev -``` - -The alternative syntax below can also be used: - -```properties -quarkus.otel.resource.attributes[0]=service.name=Polaris -quarkus.otel.resource.attributes[1]=deployment.environment=dev -``` - -Finally, two additional span attributes are added to all request parent spans: - -- `polaris.request.id`: The unique identifier of the request, if set by the caller through the - `X-Request-ID` header. -- `polaris.realm`: The unique identifier of the realm. Always set (unless the request failed because - of a realm resolution error). - -### Troubleshooting Traces - -If the server is unable to publish traces, check first for a log warning message like the following: - -``` -SEVERE [io.ope.exp.int.grp.OkHttpGrpcExporter] (OkHttp http://localhost:4317/...) Failed to export spans. -The request could not be executed. Full error message: Failed to connect to localhost/0:0:0:0:0:0:0:1:4317 -``` - -This means that the server is unable to connect to the collector. Check that the collector is -running and that the URL is correct. - -## Logging - -Polaris relies on [Quarkus](https://quarkus.io/guides/logging) for logging. - -By default, logs are written to the console and to a file located in the `./logs` directory. The log -file is rotated daily and compressed. The maximum size of the log file is 10MB, and the maximum -number of backup files is 14. - -JSON logging can be enabled by setting the `quarkus.log.console.json.enabled` and `quarkus.log.file.json.enabled` -properties to `true`. By default, JSON logging is disabled. - -The log level can be set for the entire application or for specific packages. The default log level -is `INFO`. To set the log level for the entire application, use the `quarkus.log.level` property. - -To set the log level for a specific package, use the `quarkus.log.category."package-name".level`, -where `package-name` is the name of the package. For example, the package `io.smallrye.config` has a -useful logger to help debugging configuration issues; but it needs to be set to the `DEBUG` level. -This can be done by setting the following property: - -```properties -quarkus.log.category."io.smallrye.config".level=DEBUG -``` - -The log message format for both console and file output is highly configurable. The default format -is: - -``` -%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n -``` - -Refer to the [Logging format](https://quarkus.io/guides/logging#logging-format) guide for more -information on placeholders and how to customize the log message format. - -### MDC Logging - -Polaris uses Mapped Diagnostic Context (MDC) to enrich log messages with additional context. The -following MDC keys are available: - -- `requestId`: The unique identifier of the request, if set by the caller through the - `X-Request-ID` header. -- `realmId`: The unique identifier of the realm. Always set. -- `traceId`: The unique identifier of the trace. Present if tracing is enabled and the message is - originating from a traced context. -- `parentId`: The unique identifier of the parent span. Present if tracing is enabled and the - message is originating from a traced context. -- `spanId`: The unique identifier of the span. Present if tracing is enabled and the message is - originating from a traced context. -- `sampled`: Whether the trace has been sampled. Present if tracing is enabled and the message is - originating from a traced context. - -Other MDC keys can be added by setting the `polaris.log.mdc.*` property. Each property is a -key-value pair, where the key is the MDC key name and the value is the MDC key value. For example, -to add the MDC keys `environment=prod` and `region=us-west-2` to all log messages, set the following -properties: - -```properties -polaris.log.mdc.environment=prod -polaris.log.mdc.region=us-west-2 -``` - -MDC context is propagated across threads, including in `TaskExecutor` threads. - -## Compute Client Audit Reporting - -Polaris supports end-to-end audit correlation between catalog operations, credential vending, and -compute engine metrics reports. This enables organizations to trace data access from the initial -catalog request through to actual S3/GCS/Azure storage access. - -### Metrics Reporting Endpoint - -Compute engines can report scan and commit metrics to Polaris using the standard Iceberg REST -Catalog metrics endpoint: - -``` -POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics -``` - -**Request Body**: A `ReportMetricsRequest` containing either a `ScanReport` or `CommitReport`: - -```json -{ - "report-type": "scan-report", - "table-name": "my_table", - "snapshot-id": 123456789, - "schema-id": 0, - "projected-field-ids": [1, 2, 3], - "projected-field-names": ["id", "name", "value"], - "filter": {"type": "always-true"}, - "metrics": { - "result-data-files": {"unit": "count", "value": 10}, - "total-file-size-bytes": {"unit": "bytes", "value": 1048576} - }, - "metadata": { - "trace-id": "abcdef1234567890abcdef1234567890", - "client-app": "spark-3.5" - } -} -``` - -**Response**: `204 No Content` on success. - -The `metadata` map in the report can contain a `trace-id` for correlation with other audit events. -This trace ID is extracted and stored in the event's `additional_properties` with a `report.` prefix. - -### Trace Correlation - -When OpenTelemetry is enabled, Polaris captures the `trace_id` at multiple points: - -1. **Catalog Operations**: Events like `loadTable`, `createTable` include the OpenTelemetry trace - context in their metadata. -2. **Credential Vending**: When AWS STS session tags are enabled, the `trace_id` is included as a - session tag (`polaris:trace_id`) in the vended credentials. This appears in AWS CloudTrail logs. -3. **Metrics Reports**: When compute engines report scan/commit metrics back to Polaris, the - `reportMetrics` events capture both the OpenTelemetry trace context from HTTP headers and any - `trace-id` passed in the report's `metadata` map. - -### Enabling Session Tags for AWS - -To enable session tags (including trace_id) in AWS STS credentials, set the following feature flag: - -```properties -polaris.features."INCLUDE_SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL"=true -``` - -This adds the following tags to all STS AssumeRole requests: - -- `polaris:catalog` - The catalog name -- `polaris:namespace` - The namespace being accessed -- `polaris:table` - The table name -- `polaris:principal` - The authenticated principal -- `polaris:roles` - The activated principal roles -- `polaris:trace_id` - The OpenTelemetry trace ID - -These tags appear in AWS CloudTrail logs, enabling correlation with Polaris audit events. - -**Note**: Enabling session tags requires the IAM role trust policy to allow the `sts:TagSession` -action. This feature may also reduce credential caching effectiveness since credentials become -specific to each table/namespace/role combination. - -### Compute Engine Integration - -For end-to-end trace correlation, compute engines should propagate the W3C Trace Context headers -when making requests to Polaris. The standard headers are: - -- `traceparent`: Contains the trace ID, parent span ID, and trace flags -- `tracestate`: Optional vendor-specific trace information - -#### Apache Spark - -Spark can propagate trace context using the OpenTelemetry Java agent. Add the agent to your Spark -submit command: - -```bash -spark-submit \ - --conf "spark.driver.extraJavaOptions=-javaagent:/path/to/opentelemetry-javaagent.jar" \ - --conf "spark.executor.extraJavaOptions=-javaagent:/path/to/opentelemetry-javaagent.jar" \ - -Dotel.service.name=spark-app \ - -Dotel.exporter.otlp.endpoint=http://collector:4317 \ - your-application.jar -``` - -Alternatively, configure the agent via environment variables: - -```bash -export OTEL_SERVICE_NAME=spark-app -export OTEL_EXPORTER_OTLP_ENDPOINT=http://collector:4317 -export JAVA_TOOL_OPTIONS="-javaagent:/path/to/opentelemetry-javaagent.jar" -``` - -#### Trino - -Trino supports OpenTelemetry tracing with the following configuration in `config.properties`: - -```properties -tracing.enabled=true -tracing.exporter.endpoint=http://collector:4317 -``` - -#### Flink - -Flink can be configured with OpenTelemetry using the Java agent: - -```bash --javaagent:/path/to/opentelemetry-javaagent.jar \ --Dotel.service.name=flink-job \ --Dotel.exporter.otlp.endpoint=http://collector:4317 -``` - -### Correlating Audit Events - -With trace correlation enabled, you can join events across systems: - -1. **Polaris Events**: Query the events table for operations with a specific `trace_id` -2. **CloudTrail Logs**: Filter by the `polaris:trace_id` session tag -3. **Compute Engine Logs**: Search for the same trace ID in engine logs - -Example queries to find all Polaris events for a trace: - -**PostgreSQL** (using JSON operators): -```sql -SELECT * FROM polaris_schema.events -WHERE additional_properties->>'otel.trace_id' = '' - OR additional_properties->>'report.trace-id' = '' -ORDER BY timestamp_ms; -``` - -**H2/Generic SQL** (using LIKE pattern matching): -```sql -SELECT * FROM polaris_schema.events -WHERE additional_properties LIKE '%%' -ORDER BY timestamp_ms; -``` - -### Metrics Event Data - -The `AfterReportMetricsEvent` captures the following data in `additional_properties`: - -**For ScanReports:** -- `report_type`: "scan" -- `snapshot_id`: The snapshot ID being scanned -- `schema_id`: The schema ID -- `result_data_files`: Number of data files in the scan result -- `result_delete_files`: Number of delete files in the scan result -- `total_file_size_bytes`: Total size of files scanned -- `scanned_data_manifests`: Number of data manifests scanned -- `skipped_data_manifests`: Number of data manifests skipped -- `report.*`: Any metadata from the report's metadata map (e.g., `report.trace-id`) - -**For CommitReports:** -- `report_type`: "commit" -- `snapshot_id`: The new snapshot ID -- `sequence_number`: The sequence number -- `operation`: The operation type (e.g., "append", "overwrite") -- `added_data_files`: Number of data files added -- `removed_data_files`: Number of data files removed -- `added_records`: Number of records added -- `removed_records`: Number of records removed -- `added_file_size_bytes`: Total size of files added -- `removed_file_size_bytes`: Total size of files removed -- `report.*`: Any metadata from the report's metadata map (e.g., `report.trace-id`) - -## Links - -Visit [Using Polaris with telemetry tools]({{% relref "getting-started/using-polaris/telemetry-tools" %}}) to see sample Polaris config with Prometheus and Jaeger. From f57ad416d2f6410f2dd9287bc686b6c5ed383e2c Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 21 Jan 2026 10:27:22 -0800 Subject: [PATCH 32/67] Add schema version checks to metrics persistence methods Address reviewer feedback: check schema version before accessing metrics tables. The metrics tables (scan_metrics_report, commit_metrics_report) were introduced in schema version 4. On deployments with older schemas (v1-v3), these tables don't exist and operations would fail with SQL errors. This change adds graceful degradation: - Write methods: log at DEBUG level and return silently (no-op) - Query methods: return empty list - Delete methods: return 0 Added: - METRICS_TABLES_MIN_SCHEMA_VERSION constant (4) - supportsMetricsPersistence() public method for capability check - Schema version checks to all 9 metrics methods This follows the existing pattern used in hasOverlappingSiblings() for schema version 2 features. --- .../jdbc/JdbcBasePersistenceImpl.java | 81 +++++++++++++++++-- 1 file changed, 74 insertions(+), 7 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index 35e7d9c30e..9917e3dda2 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -96,6 +96,9 @@ public class JdbcBasePersistenceImpl implements BasePersistence, IntegrationPers // The max number of components a location can have before the optimized sibling check is not used private static final int MAX_LOCATION_COMPONENTS = 40; + // Minimum schema version that includes metrics tables (scan_metrics_report, commit_metrics_report) + private static final int METRICS_TABLES_MIN_SCHEMA_VERSION = 4; + public JdbcBasePersistenceImpl( PolarisDiagnostics diagnostics, DatasourceOperations databaseOperations, @@ -111,6 +114,18 @@ public JdbcBasePersistenceImpl( this.schemaVersion = schemaVersion; } + /** + * Returns true if the current schema version supports metrics persistence tables. + * + *

    Metrics tables (scan_metrics_report, commit_metrics_report) were introduced in schema + * version 4. On older schemas, metrics persistence operations will be no-ops. + * + * @return true if schema version >= 4, false otherwise + */ + public boolean supportsMetricsPersistence() { + return this.schemaVersion >= METRICS_TABLES_MIN_SCHEMA_VERSION; + } + @Override public long generateNewId(@Nonnull PolarisCallContext callCtx) { return IdGenerator.getIdGenerator().nextId(); @@ -322,9 +337,17 @@ public void writeEvents(@Nonnull List events) { /** * Writes a scan metrics report to the database as a first-class entity. * + *

    This method requires schema version 4 or higher. On older schemas, this method is a no-op. + * * @param report the scan metrics report to persist */ public void writeScanMetricsReport(@Nonnull ModelScanMetricsReport report) { + if (!supportsMetricsPersistence()) { + LOGGER.debug( + "Schema version {} does not support metrics tables. Skipping scan metrics write.", + schemaVersion); + return; + } try { PreparedQuery pq = QueryGenerator.generateInsertQueryWithoutRealmId( @@ -344,9 +367,17 @@ public void writeScanMetricsReport(@Nonnull ModelScanMetricsReport report) { /** * Writes a commit metrics report to the database as a first-class entity. * + *

    This method requires schema version 4 or higher. On older schemas, this method is a no-op. + * * @param report the commit metrics report to persist */ public void writeCommitMetricsReport(@Nonnull ModelCommitMetricsReport report) { + if (!supportsMetricsPersistence()) { + LOGGER.debug( + "Schema version {} does not support metrics tables. Skipping commit metrics write.", + schemaVersion); + return; + } try { PreparedQuery pq = QueryGenerator.generateInsertQueryWithoutRealmId( @@ -366,13 +397,16 @@ public void writeCommitMetricsReport(@Nonnull ModelCommitMetricsReport report) { /** * Retrieves scan metrics reports for a specific table within a time range. * + *

    This method requires schema version 4 or higher. On older schemas, returns an empty list. + * * @param catalogName the catalog name * @param namespace the namespace * @param tableName the table name * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound * @param limit maximum number of results to return - * @return list of scan metrics reports matching the criteria + * @return list of scan metrics reports matching the criteria, or empty list if schema version < + * 4 */ @Nonnull public List queryScanMetricsReports( @@ -382,6 +416,9 @@ public List queryScanMetricsReports( @Nullable Long startTimeMs, @Nullable Long endTimeMs, int limit) { + if (!supportsMetricsPersistence()) { + return Collections.emptyList(); + } try { StringBuilder whereClause = new StringBuilder(); whereClause.append("realm_id = ? AND catalog_name = ? AND namespace = ? AND table_name = ?"); @@ -417,13 +454,16 @@ public List queryScanMetricsReports( /** * Retrieves commit metrics reports for a specific table within a time range. * + *

    This method requires schema version 4 or higher. On older schemas, returns an empty list. + * * @param catalogName the catalog name * @param namespace the namespace * @param tableName the table name * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound * @param limit maximum number of results to return - * @return list of commit metrics reports matching the criteria + * @return list of commit metrics reports matching the criteria, or empty list if schema version + * < 4 */ @Nonnull public List queryCommitMetricsReports( @@ -433,6 +473,9 @@ public List queryCommitMetricsReports( @Nullable Long startTimeMs, @Nullable Long endTimeMs, int limit) { + if (!supportsMetricsPersistence()) { + return Collections.emptyList(); + } try { List values = new ArrayList<>(List.of(realmId, catalogName, namespace, tableName)); @@ -469,11 +512,17 @@ public List queryCommitMetricsReports( /** * Retrieves scan metrics reports by OpenTelemetry trace ID. * + *

    This method requires schema version 4 or higher. On older schemas, returns an empty list. + * * @param traceId the OpenTelemetry trace ID - * @return list of scan metrics reports with the given trace ID + * @return list of scan metrics reports with the given trace ID, or empty list if schema version + * < 4 */ @Nonnull public List queryScanMetricsReportsByTraceId(@Nonnull String traceId) { + if (!supportsMetricsPersistence()) { + return Collections.emptyList(); + } try { String sql = "SELECT * FROM " @@ -495,12 +544,18 @@ public List queryScanMetricsReportsByTraceId(@Nonnull St /** * Retrieves commit metrics reports by OpenTelemetry trace ID. * + *

    This method requires schema version 4 or higher. On older schemas, returns an empty list. + * * @param traceId the OpenTelemetry trace ID - * @return list of commit metrics reports with the given trace ID + * @return list of commit metrics reports with the given trace ID, or empty list if schema version + * < 4 */ @Nonnull public List queryCommitMetricsReportsByTraceId( @Nonnull String traceId) { + if (!supportsMetricsPersistence()) { + return Collections.emptyList(); + } try { String sql = "SELECT * FROM " @@ -522,11 +577,16 @@ public List queryCommitMetricsReportsByTraceId( /** * Deletes scan metrics reports older than the specified timestamp. * + *

    This method requires schema version 4 or higher. On older schemas, returns 0. + * * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be * deleted - * @return the number of reports deleted + * @return the number of reports deleted, or 0 if schema version < 4 */ public int deleteScanMetricsReportsOlderThan(long olderThanMs) { + if (!supportsMetricsPersistence()) { + return 0; + } try { String sql = "DELETE FROM " @@ -544,11 +604,16 @@ public int deleteScanMetricsReportsOlderThan(long olderThanMs) { /** * Deletes commit metrics reports older than the specified timestamp. * + *

    This method requires schema version 4 or higher. On older schemas, returns 0. + * * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be * deleted - * @return the number of reports deleted + * @return the number of reports deleted, or 0 if schema version < 4 */ public int deleteCommitMetricsReportsOlderThan(long olderThanMs) { + if (!supportsMetricsPersistence()) { + return 0; + } try { String sql = "DELETE FROM " @@ -567,9 +632,11 @@ public int deleteCommitMetricsReportsOlderThan(long olderThanMs) { /** * Deletes all metrics reports (both scan and commit) older than the specified timestamp. * + *

    This method requires schema version 4 or higher. On older schemas, returns 0. + * * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be * deleted - * @return the total number of reports deleted (scan + commit) + * @return the total number of reports deleted (scan + commit), or 0 if schema version < 4 */ public int deleteAllMetricsReportsOlderThan(long olderThanMs) { int scanDeleted = deleteScanMetricsReportsOlderThan(olderThanMs); From df0d46f6a036972f11c0445de56f7db86d797927 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 21 Jan 2026 11:04:22 -0800 Subject: [PATCH 33/67] Apply spotless formatting --- .../relational/jdbc/JdbcBasePersistenceImpl.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index 9917e3dda2..62ad93d0b9 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -96,7 +96,8 @@ public class JdbcBasePersistenceImpl implements BasePersistence, IntegrationPers // The max number of components a location can have before the optimized sibling check is not used private static final int MAX_LOCATION_COMPONENTS = 40; - // Minimum schema version that includes metrics tables (scan_metrics_report, commit_metrics_report) + // Minimum schema version that includes metrics tables (scan_metrics_report, + // commit_metrics_report) private static final int METRICS_TABLES_MIN_SCHEMA_VERSION = 4; public JdbcBasePersistenceImpl( @@ -405,8 +406,8 @@ public void writeCommitMetricsReport(@Nonnull ModelCommitMetricsReport report) { * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound * @param limit maximum number of results to return - * @return list of scan metrics reports matching the criteria, or empty list if schema version < - * 4 + * @return list of scan metrics reports matching the criteria, or empty list if schema version + * < 4 */ @Nonnull public List queryScanMetricsReports( From 7715acfa1eb1f9b24b2448170dc7fb537a9cad11 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 23 Jan 2026 08:55:33 -0800 Subject: [PATCH 34/67] Add tests for schema version < 4 graceful degradation Tests verify that metrics methods handle older schemas gracefully: - supportsMetricsPersistence() returns false for v1-v3 - Write methods are silent no-ops - Query methods return empty lists - Delete methods return 0 --- .../jdbc/MetricsReportPersistenceTest.java | 174 ++++++++++++++++++ 1 file changed, 174 insertions(+) diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index 252a19b920..25b815bd72 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -489,6 +489,180 @@ void testDeleteOldCommitMetricsReports() { assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } + // ==================== Schema Version < 4 Tests ==================== + // These tests verify graceful degradation when metrics tables don't exist + + @Test + void testSupportsMetricsPersistence_SchemaV4() { + assertThat(persistence.supportsMetricsPersistence()).isTrue(); + } + + @Test + void testSupportsMetricsPersistence_SchemaV3() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + assertThat(v3Persistence.supportsMetricsPersistence()).isFalse(); + } + + @Test + void testSupportsMetricsPersistence_SchemaV1() { + JdbcBasePersistenceImpl v1Persistence = createPersistenceWithSchemaVersion(1); + assertThat(v1Persistence.supportsMetricsPersistence()).isFalse(); + } + + @Test + void testWriteScanMetricsReport_OlderSchema_IsNoOp() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db") + .tableName("test_table") + .timestampMs(System.currentTimeMillis()) + .resultDataFiles(1L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(100L) + .totalDataManifests(1L) + .totalDeleteManifests(0L) + .scannedDataManifests(1L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(10L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + + // Should not throw - silently ignored on older schemas + v3Persistence.writeScanMetricsReport(report); + } + + @Test + void testWriteCommitMetricsReport_OlderSchema_IsNoOp() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + + ModelCommitMetricsReport report = + ImmutableModelCommitMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db") + .tableName("test_table") + .timestampMs(System.currentTimeMillis()) + .snapshotId(12345L) + .operation("append") + .addedDataFiles(1L) + .removedDataFiles(0L) + .totalDataFiles(1L) + .addedDeleteFiles(0L) + .removedDeleteFiles(0L) + .totalDeleteFiles(0L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(0L) + .removedPositionalDeleteFiles(0L) + .addedRecords(100L) + .removedRecords(0L) + .totalRecords(100L) + .addedFileSizeBytes(1000L) + .removedFileSizeBytes(0L) + .totalFileSizeBytes(1000L) + .totalDurationMs(50L) + .attempts(1) + .build(); + + // Should not throw - silently ignored on older schemas + v3Persistence.writeCommitMetricsReport(report); + } + + @Test + void testQueryScanMetricsReports_OlderSchema_ReturnsEmptyList() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + + var results = + v3Persistence.queryScanMetricsReports("catalog", "namespace", "table", null, null, 10); + + assertThat(results).isEmpty(); + } + + @Test + void testQueryCommitMetricsReports_OlderSchema_ReturnsEmptyList() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + + var results = + v3Persistence.queryCommitMetricsReports("catalog", "namespace", "table", null, null, 10); + + assertThat(results).isEmpty(); + } + + @Test + void testQueryScanMetricsReportsByTraceId_OlderSchema_ReturnsEmptyList() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + + var results = v3Persistence.queryScanMetricsReportsByTraceId("trace-123"); + + assertThat(results).isEmpty(); + } + + @Test + void testQueryCommitMetricsReportsByTraceId_OlderSchema_ReturnsEmptyList() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + + var results = v3Persistence.queryCommitMetricsReportsByTraceId("trace-123"); + + assertThat(results).isEmpty(); + } + + @Test + void testDeleteScanMetricsReportsOlderThan_OlderSchema_ReturnsZero() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + + int deleted = v3Persistence.deleteScanMetricsReportsOlderThan(System.currentTimeMillis()); + + assertThat(deleted).isEqualTo(0); + } + + @Test + void testDeleteCommitMetricsReportsOlderThan_OlderSchema_ReturnsZero() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + + int deleted = v3Persistence.deleteCommitMetricsReportsOlderThan(System.currentTimeMillis()); + + assertThat(deleted).isEqualTo(0); + } + + @Test + void testDeleteAllMetricsReportsOlderThan_OlderSchema_ReturnsZero() { + JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + + int deleted = v3Persistence.deleteAllMetricsReportsOlderThan(System.currentTimeMillis()); + + assertThat(deleted).isEqualTo(0); + } + + /** + * Creates a JdbcBasePersistenceImpl with the specified schema version. This uses the same + * datasource but with a different reported schema version to test graceful degradation. + */ + private JdbcBasePersistenceImpl createPersistenceWithSchemaVersion(int schemaVersion) { + PolarisDiagnostics diagServices = new PolarisDefaultDiagServiceImpl(); + return new JdbcBasePersistenceImpl( + diagServices, + datasourceOperations, + RANDOM_SECRETS, + Mockito.mock(), + "TEST_REALM", + schemaVersion); + } + private static class TestJdbcConfiguration implements RelationalJdbcConfiguration { @Override public Optional maxRetries() { From e857bbef63930fcb7ed52524a664c15b368589c1 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 23 Jan 2026 09:28:04 -0800 Subject: [PATCH 35/67] Remove query indexes, keep only retention cleanup index Per reviewer feedback, removed indexes for query patterns (table, trace_id, principal, operation, snapshot) since no analytics APIs currently expose these queries. Kept only the composite (realm_id, timestamp_ms) index needed for retention cleanup operations. Additional indexes can be added when analytics APIs are introduced. Also restored telemetry.md to match upstream (no changes from this PR). --- site/content/in-dev/unreleased/telemetry.md | 196 ++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 site/content/in-dev/unreleased/telemetry.md diff --git a/site/content/in-dev/unreleased/telemetry.md b/site/content/in-dev/unreleased/telemetry.md new file mode 100644 index 0000000000..fc1a1336ce --- /dev/null +++ b/site/content/in-dev/unreleased/telemetry.md @@ -0,0 +1,196 @@ +--- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +title: Telemetry +type: docs +weight: 450 +--- + +## Metrics + +Metrics are published using [Micrometer]; they are available from Polaris's management interface +(port 8182 by default) under the path `/q/metrics`. For example, if the server is running on +localhost, the metrics can be accessed via http://localhost:8182/q/metrics. + +[Micrometer]: https://quarkus.io/guides/telemetry-micrometer + +Metrics can be scraped by Prometheus or any compatible metrics scraping server. See: +[Prometheus](https://prometheus.io) for more information. + +Additional tags can be added to the metrics by setting the `polaris.metrics.tags.*` property. Each +tag is a key-value pair, where the key is the tag name and the value is the tag value. For example, +to add a tag `environment=prod` to all metrics, set `polaris.metrics.tags.environment=prod`. Many +tags can be added, such as below: + +```properties +polaris.metrics.tags.service=polaris +polaris.metrics.tags.environment=prod +polaris.metrics.tags.region=us-west-2 +``` + +Note that by default Polaris adds one tag: `application=Polaris`. You can override this tag by +setting the `polaris.metrics.tags.application=` property. + +### Realm ID Tag + +Polaris can add the realm ID as a tag to all API and HTTP request metrics. This is disabled by +default to prevent high cardinality issues, but can be enabled by setting the following properties: + +```properties +polaris.metrics.realm-id-tag.enable-in-api-metrics=true +polaris.metrics.realm-id-tag.enable-in-http-metrics=true +``` + +You should be particularly careful when enabling the realm ID tag in HTTP request metrics, as these +metrics typically have a much higher cardinality than API request metrics. + +In order to prevent the number of tags from growing indefinitely and causing performance issues or +crashing the server, the number of unique realm IDs in HTTP request metrics is limited to 100 by +default. If the number of unique realm IDs exceeds this value, a warning will be logged and no more +HTTP request metrics will be recorded. This threshold can be changed by setting the +`polaris.metrics.realm-id-tag.http-metrics-max-cardinality` property. + +## Traces + +Traces are published using [OpenTelemetry]. + +[OpenTelemetry]: https://quarkus.io/guides/opentelemetry-tracing + +By default OpenTelemetry is disabled in Polaris, because there is no reasonable default +for the collector endpoint for all cases. + +To enable OpenTelemetry and publish traces for Polaris set `quarkus.otel.sdk.disabled=false` +and configure a valid collector endpoint URL with `http://` or `https://` as the server property +`quarkus.otel.exporter.otlp.traces.endpoint`. + +_If these properties are not set, the server will not publish traces._ + +The collector must talk the OpenTelemetry protocol (OTLP) and the port must be its gRPC port +(by default 4317), e.g. "http://otlp-collector:4317". + +By default, Polaris adds a few attributes to the [OpenTelemetry Resource] to identify the server, +and notably: + +- `service.name`: set to `Apache Polaris Server (incubating)`; +- `service.version`: set to the Polaris version. + +[OpenTelemetry Resource]: https://opentelemetry.io/docs/languages/js/resources/ + +You can override the default resource attributes or add additional ones by setting the +`quarkus.otel.resource.attributes` property. + +This property expects a comma-separated list of key-value pairs, where the key is the attribute name +and the value is the attribute value. For example, to change the service name to `Polaris` and add +an attribute `deployment.environment=dev`, set the following property: + +```properties +quarkus.otel.resource.attributes=service.name=Polaris,deployment.environment=dev +``` + +The alternative syntax below can also be used: + +```properties +quarkus.otel.resource.attributes[0]=service.name=Polaris +quarkus.otel.resource.attributes[1]=deployment.environment=dev +``` + +Finally, two additional span attributes are added to all request parent spans: + +- `polaris.request.id`: The unique identifier of the request, if set by the caller through the + `X-Request-ID` header. +- `polaris.realm`: The unique identifier of the realm. Always set (unless the request failed because + of a realm resolution error). + +### Troubleshooting Traces + +If the server is unable to publish traces, check first for a log warning message like the following: + +``` +SEVERE [io.ope.exp.int.grp.OkHttpGrpcExporter] (OkHttp http://localhost:4317/...) Failed to export spans. +The request could not be executed. Full error message: Failed to connect to localhost/0:0:0:0:0:0:0:1:4317 +``` + +This means that the server is unable to connect to the collector. Check that the collector is +running and that the URL is correct. + +## Logging + +Polaris relies on [Quarkus](https://quarkus.io/guides/logging) for logging. + +By default, logs are written to the console and to a file located in the `./logs` directory. The log +file is rotated daily and compressed. The maximum size of the log file is 10MB, and the maximum +number of backup files is 14. + +JSON logging can be enabled by setting the `quarkus.log.console.json.enabled` and `quarkus.log.file.json.enabled` +properties to `true`. By default, JSON logging is disabled. + +The log level can be set for the entire application or for specific packages. The default log level +is `INFO`. To set the log level for the entire application, use the `quarkus.log.level` property. + +To set the log level for a specific package, use the `quarkus.log.category."package-name".level`, +where `package-name` is the name of the package. For example, the package `io.smallrye.config` has a +useful logger to help debugging configuration issues; but it needs to be set to the `DEBUG` level. +This can be done by setting the following property: + +```properties +quarkus.log.category."io.smallrye.config".level=DEBUG +``` + +The log message format for both console and file output is highly configurable. The default format +is: + +``` +%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n +``` + +Refer to the [Logging format](https://quarkus.io/guides/logging#logging-format) guide for more +information on placeholders and how to customize the log message format. + +### MDC Logging + +Polaris uses Mapped Diagnostic Context (MDC) to enrich log messages with additional context. The +following MDC keys are available: + +- `requestId`: The unique identifier of the request, if set by the caller through the + `X-Request-ID` header. +- `realmId`: The unique identifier of the realm. Always set. +- `traceId`: The unique identifier of the trace. Present if tracing is enabled and the message is + originating from a traced context. +- `parentId`: The unique identifier of the parent span. Present if tracing is enabled and the + message is originating from a traced context. +- `spanId`: The unique identifier of the span. Present if tracing is enabled and the message is + originating from a traced context. +- `sampled`: Whether the trace has been sampled. Present if tracing is enabled and the message is + originating from a traced context. + +Other MDC keys can be added by setting the `polaris.log.mdc.*` property. Each property is a +key-value pair, where the key is the MDC key name and the value is the MDC key value. For example, +to add the MDC keys `environment=prod` and `region=us-west-2` to all log messages, set the following +properties: + +```properties +polaris.log.mdc.environment=prod +polaris.log.mdc.region=us-west-2 +``` + +MDC context is propagated across threads, including in `TaskExecutor` threads. + +## Links + +Visit [Using Polaris with telemetry tools]({{% relref "getting-started/using-polaris/telemetry-tools" %}}) to see sample Polaris config with Prometheus and Jaeger. From efe31c236dec36ec854eb775f4847deedb5661ce Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 23 Jan 2026 10:28:20 -0800 Subject: [PATCH 36/67] Add realm_id to PRIMARY KEY and roles junction tables Per reviewer feedback: - Changed PRIMARY KEY from (report_id) to (realm_id, report_id) for both scan_metrics_report and commit_metrics_report tables, consistent with other Polaris tables - Added scan_metrics_report_roles and commit_metrics_report_roles junction tables to store activated principal roles (PolarisPrincipal.getRoles()) - Added roles field to ModelScanMetricsReport and ModelCommitMetricsReport - Updated persistence methods to write roles to junction tables - Added tests for writing reports with roles --- .../jdbc/JdbcBasePersistenceImpl.java | 20 +++++ .../jdbc/models/ModelCommitMetricsReport.java | 11 +++ .../jdbc/models/ModelScanMetricsReport.java | 11 +++ .../jdbc/MetricsReportPersistenceTest.java | 86 +++++++++++++++++++ 4 files changed, 128 insertions(+) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index 62ad93d0b9..9d1ab369b6 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -359,6 +359,16 @@ public void writeScanMetricsReport(@Nonnull ModelScanMetricsReport report) { if (updated == 0) { throw new SQLException("Scan metrics report was not inserted."); } + + // Insert roles into junction table + for (String role : report.getRoles()) { + PreparedQuery roleQuery = + QueryGenerator.generateInsertQueryWithoutRealmId( + List.of("realm_id", "report_id", "role_name"), + "SCAN_METRICS_REPORT_ROLES", + List.of(report.getRealmId(), report.getReportId(), role)); + datasourceOperations.executeUpdate(roleQuery); + } } catch (SQLException e) { throw new RuntimeException( String.format("Failed to write scan metrics report due to %s", e.getMessage()), e); @@ -389,6 +399,16 @@ public void writeCommitMetricsReport(@Nonnull ModelCommitMetricsReport report) { if (updated == 0) { throw new SQLException("Commit metrics report was not inserted."); } + + // Insert roles into junction table + for (String role : report.getRoles()) { + PreparedQuery roleQuery = + QueryGenerator.generateInsertQueryWithoutRealmId( + List.of("realm_id", "report_id", "role_name"), + "COMMIT_METRICS_REPORT_ROLES", + List.of(report.getRealmId(), report.getReportId(), role)); + datasourceOperations.executeUpdate(roleQuery); + } } catch (SQLException e) { throw new RuntimeException( String.format("Failed to write commit metrics report due to %s", e.getMessage()), e); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java index a2c7b420c6..57026cea0c 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java @@ -24,8 +24,10 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.polaris.immutables.PolarisImmutable; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; +import org.immutables.value.Value; /** Model class for commit_metrics_report table - stores commit metrics as first-class entities. */ @PolarisImmutable @@ -181,6 +183,15 @@ public interface ModelCommitMetricsReport extends Converter getRoles() { + return Set.of(); + } + @Override default ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException { return ImmutableModelCommitMetricsReport.builder() diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java index 8a44d44390..3d28905f14 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java @@ -24,8 +24,10 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.polaris.immutables.PolarisImmutable; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; +import org.immutables.value.Value; /** Model class for scan_metrics_report table - stores scan metrics as first-class entities. */ @PolarisImmutable @@ -185,6 +187,15 @@ public interface ModelScanMetricsReport extends Converter getRoles() { + return Set.of(); + } + @Override default ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { return ImmutableModelScanMetricsReport.builder() diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index 25b815bd72..d6d621b2b8 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -24,6 +24,7 @@ import java.io.InputStream; import java.sql.SQLException; import java.util.Optional; +import java.util.Set; import java.util.UUID; import javax.sql.DataSource; import org.apache.polaris.core.PolarisDefaultDiagServiceImpl; @@ -648,6 +649,91 @@ void testDeleteAllMetricsReportsOlderThan_OlderSchema_ReturnsZero() { assertThat(deleted).isEqualTo(0); } + @Test + void testWriteScanMetricsReportWithRoles() { + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db.schema") + .tableName("test_table") + .timestampMs(System.currentTimeMillis()) + .snapshotId(12345L) + .schemaId(1) + .resultDataFiles(10L) + .resultDeleteFiles(2L) + .totalFileSizeBytes(1024000L) + .totalDataManifests(5L) + .totalDeleteManifests(1L) + .scannedDataManifests(3L) + .scannedDeleteManifests(1L) + .skippedDataManifests(2L) + .skippedDeleteManifests(0L) + .skippedDataFiles(5L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(150L) + .equalityDeleteFiles(1L) + .positionalDeleteFiles(1L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(10240L) + .principalName("test-user") + .requestId("req-123") + .otelTraceId("trace-abc") + .otelSpanId("span-xyz") + .reportTraceId("report-trace-roles") + .roles(Set.of("admin", "data_engineer", "analyst")) + .build(); + + // Should not throw - roles are written to junction table + persistence.writeScanMetricsReport(report); + } + + @Test + void testWriteCommitMetricsReportWithRoles() { + ModelCommitMetricsReport report = + ImmutableModelCommitMetricsReport.builder() + .reportId(UUID.randomUUID().toString()) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db.schema") + .tableName("test_table") + .timestampMs(System.currentTimeMillis()) + .snapshotId(12345L) + .sequenceNumber(1L) + .operation("append") + .addedDataFiles(5L) + .removedDataFiles(0L) + .totalDataFiles(100L) + .addedDeleteFiles(0L) + .removedDeleteFiles(0L) + .totalDeleteFiles(2L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(0L) + .removedPositionalDeleteFiles(0L) + .addedRecords(1000L) + .removedRecords(0L) + .totalRecords(50000L) + .addedFileSizeBytes(102400L) + .removedFileSizeBytes(0L) + .totalFileSizeBytes(5120000L) + .totalDurationMs(250L) + .attempts(1) + .principalName("test-user") + .requestId("req-456") + .otelTraceId("trace-def") + .otelSpanId("span-uvw") + .reportTraceId("report-trace-roles") + .roles(Set.of("admin", "data_engineer")) + .build(); + + // Should not throw - roles are written to junction table + persistence.writeCommitMetricsReport(report); + } + /** * Creates a JdbcBasePersistenceImpl with the specified schema version. This uses the same * datasource but with a different reported schema version to test graceful degradation. From 1de8bd41cc1967fcf045793af695e87460d5b916 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 23 Jan 2026 10:51:07 -0800 Subject: [PATCH 37/67] Fix code review issues: transaction support, role validation, and role loading - Add transaction support for report + roles writes using runWithinTransaction() - Validate/filter role values before insert (skip null/blank roles) - Implement role loading on reads via loadScanMetricsReportRoles() and loadCommitMetricsReportRoles() helper methods - Add comprehensive tests for roles: - testScanMetricsReportRolesAreReadBack() - testCommitMetricsReportRolesAreReadBack() - testScanMetricsReportWithEmptyRoles() - testScanMetricsReportRolesViaTimeRangeQuery() All tests pass. --- .../jdbc/JdbcBasePersistenceImpl.java | 209 +++++++++++++---- .../jdbc/MetricsReportPersistenceTest.java | 216 ++++++++++++++++++ 2 files changed, 385 insertions(+), 40 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index 9d1ab369b6..b39c62c321 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -28,11 +28,13 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Predicate; @@ -68,7 +70,10 @@ import org.apache.polaris.core.storage.PolarisStorageIntegration; import org.apache.polaris.core.storage.PolarisStorageIntegrationProvider; import org.apache.polaris.core.storage.StorageLocation; +import org.apache.polaris.persistence.relational.jdbc.models.Converter; import org.apache.polaris.persistence.relational.jdbc.models.EntityNameLookupRecordConverter; +import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelScanMetricsReport; import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReportConverter; import org.apache.polaris.persistence.relational.jdbc.models.ModelEntity; @@ -350,25 +355,32 @@ public void writeScanMetricsReport(@Nonnull ModelScanMetricsReport report) { return; } try { - PreparedQuery pq = - QueryGenerator.generateInsertQueryWithoutRealmId( - ModelScanMetricsReport.ALL_COLUMNS, - ModelScanMetricsReport.TABLE_NAME, - report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); - int updated = datasourceOperations.executeUpdate(pq); - if (updated == 0) { - throw new SQLException("Scan metrics report was not inserted."); - } + datasourceOperations.runWithinTransaction( + connection -> { + PreparedQuery pq = + QueryGenerator.generateInsertQueryWithoutRealmId( + ModelScanMetricsReport.ALL_COLUMNS, + ModelScanMetricsReport.TABLE_NAME, + report.toMap(datasourceOperations.getDatabaseType()).values().stream() + .toList()); + int updated = datasourceOperations.execute(connection, pq); + if (updated == 0) { + throw new SQLException("Scan metrics report was not inserted."); + } - // Insert roles into junction table - for (String role : report.getRoles()) { - PreparedQuery roleQuery = - QueryGenerator.generateInsertQueryWithoutRealmId( - List.of("realm_id", "report_id", "role_name"), - "SCAN_METRICS_REPORT_ROLES", - List.of(report.getRealmId(), report.getReportId(), role)); - datasourceOperations.executeUpdate(roleQuery); - } + // Insert roles into junction table (filter out null/blank values) + for (String role : report.getRoles()) { + if (role != null && !role.isBlank()) { + PreparedQuery roleQuery = + QueryGenerator.generateInsertQueryWithoutRealmId( + List.of("realm_id", "report_id", "role_name"), + "SCAN_METRICS_REPORT_ROLES", + List.of(report.getRealmId(), report.getReportId(), role)); + datasourceOperations.execute(connection, roleQuery); + } + } + return true; + }); } catch (SQLException e) { throw new RuntimeException( String.format("Failed to write scan metrics report due to %s", e.getMessage()), e); @@ -390,31 +402,136 @@ public void writeCommitMetricsReport(@Nonnull ModelCommitMetricsReport report) { return; } try { - PreparedQuery pq = - QueryGenerator.generateInsertQueryWithoutRealmId( - ModelCommitMetricsReport.ALL_COLUMNS, - ModelCommitMetricsReport.TABLE_NAME, - report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); - int updated = datasourceOperations.executeUpdate(pq); - if (updated == 0) { - throw new SQLException("Commit metrics report was not inserted."); - } + datasourceOperations.runWithinTransaction( + connection -> { + PreparedQuery pq = + QueryGenerator.generateInsertQueryWithoutRealmId( + ModelCommitMetricsReport.ALL_COLUMNS, + ModelCommitMetricsReport.TABLE_NAME, + report.toMap(datasourceOperations.getDatabaseType()).values().stream() + .toList()); + int updated = datasourceOperations.execute(connection, pq); + if (updated == 0) { + throw new SQLException("Commit metrics report was not inserted."); + } - // Insert roles into junction table - for (String role : report.getRoles()) { - PreparedQuery roleQuery = - QueryGenerator.generateInsertQueryWithoutRealmId( - List.of("realm_id", "report_id", "role_name"), - "COMMIT_METRICS_REPORT_ROLES", - List.of(report.getRealmId(), report.getReportId(), role)); - datasourceOperations.executeUpdate(roleQuery); - } + // Insert roles into junction table (filter out null/blank values) + for (String role : report.getRoles()) { + if (role != null && !role.isBlank()) { + PreparedQuery roleQuery = + QueryGenerator.generateInsertQueryWithoutRealmId( + List.of("realm_id", "report_id", "role_name"), + "COMMIT_METRICS_REPORT_ROLES", + List.of(report.getRealmId(), report.getReportId(), role)); + datasourceOperations.execute(connection, roleQuery); + } + } + return true; + }); } catch (SQLException e) { throw new RuntimeException( String.format("Failed to write commit metrics report due to %s", e.getMessage()), e); } } + /** Simple converter to extract role_name from ResultSet. */ + private static final RoleNameConverter ROLE_NAME_CONVERTER = new RoleNameConverter(); + + /** Converter class that extracts just the role_name column from a ResultSet. */ + private static class RoleNameConverter implements Converter { + @Override + public String fromResultSet(java.sql.ResultSet rs) throws SQLException { + return rs.getString("role_name"); + } + + @Override + public Map toMap(DatabaseType databaseType) { + throw new UnsupportedOperationException("RoleNameConverter is read-only"); + } + } + + /** + * Loads roles from the scan_metrics_report_roles junction table for the given reports. + * + * @param reports the reports to populate with roles + * @return new list with roles populated + */ + private List loadScanMetricsReportRoles( + List reports) { + if (reports.isEmpty()) { + return reports; + } + try { + // Build a map of reportId -> Set roles + Map> rolesByReportId = new HashMap<>(); + for (ModelScanMetricsReport report : reports) { + String sql = + "SELECT role_name FROM " + + QueryGenerator.getFullyQualifiedTableName("SCAN_METRICS_REPORT_ROLES") + + " WHERE realm_id = ? AND report_id = ?"; + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, report.getReportId())); + List roles = datasourceOperations.executeSelect(query, ROLE_NAME_CONVERTER); + if (roles != null && !roles.isEmpty()) { + rolesByReportId.put(report.getReportId(), new HashSet<>(roles)); + } + } + + // Rebuild reports with roles populated + return reports.stream() + .map( + r -> + ImmutableModelScanMetricsReport.builder() + .from(r) + .roles(rolesByReportId.getOrDefault(r.getReportId(), Set.of())) + .build()) + .toList(); + } catch (SQLException e) { + LOGGER.warn("Failed to load roles for scan metrics reports: {}", e.getMessage(), e); + return reports; // Return reports without roles on error + } + } + + /** + * Loads roles from the commit_metrics_report_roles junction table for the given reports. + * + * @param reports the reports to populate with roles + * @return new list with roles populated + */ + private List loadCommitMetricsReportRoles( + List reports) { + if (reports.isEmpty()) { + return reports; + } + try { + // Build a map of reportId -> Set roles + Map> rolesByReportId = new HashMap<>(); + for (ModelCommitMetricsReport report : reports) { + String sql = + "SELECT role_name FROM " + + QueryGenerator.getFullyQualifiedTableName("COMMIT_METRICS_REPORT_ROLES") + + " WHERE realm_id = ? AND report_id = ?"; + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, report.getReportId())); + List roles = datasourceOperations.executeSelect(query, ROLE_NAME_CONVERTER); + if (roles != null && !roles.isEmpty()) { + rolesByReportId.put(report.getReportId(), new HashSet<>(roles)); + } + } + + // Rebuild reports with roles populated + return reports.stream() + .map( + r -> + ImmutableModelCommitMetricsReport.builder() + .from(r) + .roles(rolesByReportId.getOrDefault(r.getReportId(), Set.of())) + .build()) + .toList(); + } catch (SQLException e) { + LOGGER.warn("Failed to load roles for commit metrics reports: {}", e.getMessage(), e); + return reports; // Return reports without roles on error + } + } + /** * Retrieves scan metrics reports for a specific table within a time range. * @@ -465,7 +582,10 @@ public List queryScanMetricsReports( PreparedQuery query = new PreparedQuery(sql, values); var results = datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); - return results == null ? Collections.emptyList() : results; + if (results == null || results.isEmpty()) { + return Collections.emptyList(); + } + return loadScanMetricsReportRoles(results); } catch (SQLException e) { throw new RuntimeException( String.format("Failed to query scan metrics reports due to %s", e.getMessage()), e); @@ -523,7 +643,10 @@ public List queryCommitMetricsReports( PreparedQuery query = new PreparedQuery(sql, values); var results = datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); - return results == null ? Collections.emptyList() : results; + if (results == null || results.isEmpty()) { + return Collections.emptyList(); + } + return loadCommitMetricsReportRoles(results); } catch (SQLException e) { throw new RuntimeException( String.format("Failed to query commit metrics reports due to %s", e.getMessage()), e); @@ -553,7 +676,10 @@ public List queryScanMetricsReportsByTraceId(@Nonnull St PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); var results = datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); - return results == null ? Collections.emptyList() : results; + if (results == null || results.isEmpty()) { + return Collections.emptyList(); + } + return loadScanMetricsReportRoles(results); } catch (SQLException e) { throw new RuntimeException( String.format( @@ -586,7 +712,10 @@ public List queryCommitMetricsReportsByTraceId( PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); var results = datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); - return results == null ? Collections.emptyList() : results; + if (results == null || results.isEmpty()) { + return Collections.emptyList(); + } + return loadCommitMetricsReportRoles(results); } catch (SQLException e) { throw new RuntimeException( String.format( diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index d6d621b2b8..74faabe2f6 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -23,6 +23,7 @@ import java.io.InputStream; import java.sql.SQLException; +import java.util.List; import java.util.Optional; import java.util.Set; import java.util.UUID; @@ -734,6 +735,221 @@ void testWriteCommitMetricsReportWithRoles() { persistence.writeCommitMetricsReport(report); } + @Test + void testScanMetricsReportRolesAreReadBack() { + String reportId = UUID.randomUUID().toString(); + String otelTraceId = "otel-trace-roles-read-" + UUID.randomUUID(); + Set expectedRoles = Set.of("admin", "data_engineer", "analyst"); + + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(reportId) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db.schema") + .tableName("test_table") + .timestampMs(System.currentTimeMillis()) + .snapshotId(12345L) + .schemaId(1) + .resultDataFiles(10L) + .resultDeleteFiles(2L) + .totalFileSizeBytes(1024000L) + .totalDataManifests(5L) + .totalDeleteManifests(1L) + .scannedDataManifests(3L) + .scannedDeleteManifests(1L) + .skippedDataManifests(2L) + .skippedDeleteManifests(0L) + .skippedDataFiles(5L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(150L) + .equalityDeleteFiles(1L) + .positionalDeleteFiles(1L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(10240L) + .principalName("test-user") + .requestId("req-123") + .otelTraceId(otelTraceId) + .otelSpanId("span-xyz") + .reportTraceId("report-trace-123") + .roles(expectedRoles) + .build(); + + persistence.writeScanMetricsReport(report); + + // Query by otel trace ID and verify roles are returned + List results = + persistence.queryScanMetricsReportsByTraceId(otelTraceId); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getReportId()).isEqualTo(reportId); + assertThat(results.get(0).getRoles()).containsExactlyInAnyOrderElementsOf(expectedRoles); + } + + @Test + void testCommitMetricsReportRolesAreReadBack() { + String reportId = UUID.randomUUID().toString(); + String otelTraceId = "otel-trace-commit-roles-read-" + UUID.randomUUID(); + Set expectedRoles = Set.of("admin", "data_engineer"); + + ModelCommitMetricsReport report = + ImmutableModelCommitMetricsReport.builder() + .reportId(reportId) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db.schema") + .tableName("test_table") + .timestampMs(System.currentTimeMillis()) + .snapshotId(12345L) + .sequenceNumber(1L) + .operation("append") + .addedDataFiles(5L) + .removedDataFiles(0L) + .totalDataFiles(100L) + .addedDeleteFiles(0L) + .removedDeleteFiles(0L) + .totalDeleteFiles(2L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(0L) + .removedPositionalDeleteFiles(0L) + .addedRecords(1000L) + .removedRecords(0L) + .totalRecords(50000L) + .addedFileSizeBytes(102400L) + .removedFileSizeBytes(0L) + .totalFileSizeBytes(5120000L) + .totalDurationMs(250L) + .attempts(1) + .principalName("test-user") + .requestId("req-456") + .otelTraceId(otelTraceId) + .otelSpanId("span-uvw") + .reportTraceId("report-trace-456") + .roles(expectedRoles) + .build(); + + persistence.writeCommitMetricsReport(report); + + // Query by otel trace ID and verify roles are returned + List results = + persistence.queryCommitMetricsReportsByTraceId(otelTraceId); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getReportId()).isEqualTo(reportId); + assertThat(results.get(0).getRoles()).containsExactlyInAnyOrderElementsOf(expectedRoles); + } + + @Test + void testScanMetricsReportWithEmptyRoles() { + String reportId = UUID.randomUUID().toString(); + String otelTraceId = "otel-trace-empty-roles-" + UUID.randomUUID(); + + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(reportId) + .realmId("TEST_REALM") + .catalogId("test-catalog") + .catalogName("test-catalog") + .namespace("db.schema") + .tableName("test_table") + .timestampMs(System.currentTimeMillis()) + .snapshotId(12345L) + .schemaId(1) + .resultDataFiles(10L) + .resultDeleteFiles(2L) + .totalFileSizeBytes(1024000L) + .totalDataManifests(5L) + .totalDeleteManifests(1L) + .scannedDataManifests(3L) + .scannedDeleteManifests(1L) + .skippedDataManifests(2L) + .skippedDeleteManifests(0L) + .skippedDataFiles(5L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(150L) + .equalityDeleteFiles(1L) + .positionalDeleteFiles(1L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(10240L) + .principalName("test-user") + .requestId("req-123") + .otelTraceId(otelTraceId) + .otelSpanId("span-xyz") + .reportTraceId("report-trace-empty") + // No roles set - uses default empty set + .build(); + + persistence.writeScanMetricsReport(report); + + // Query by otel trace ID and verify empty roles + List results = + persistence.queryScanMetricsReportsByTraceId(otelTraceId); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getRoles()).isEmpty(); + } + + @Test + void testScanMetricsReportRolesViaTimeRangeQuery() { + String reportId = UUID.randomUUID().toString(); + long timestamp = System.currentTimeMillis(); + Set expectedRoles = Set.of("role1", "role2"); + + ModelScanMetricsReport report = + ImmutableModelScanMetricsReport.builder() + .reportId(reportId) + .realmId("TEST_REALM") + .catalogId("test-catalog-roles-query") + .catalogName("test-catalog-roles-query") + .namespace("db.schema") + .tableName("test_table_roles") + .timestampMs(timestamp) + .snapshotId(12345L) + .schemaId(1) + .resultDataFiles(10L) + .resultDeleteFiles(2L) + .totalFileSizeBytes(1024000L) + .totalDataManifests(5L) + .totalDeleteManifests(1L) + .scannedDataManifests(3L) + .scannedDeleteManifests(1L) + .skippedDataManifests(2L) + .skippedDeleteManifests(0L) + .skippedDataFiles(5L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(150L) + .equalityDeleteFiles(1L) + .positionalDeleteFiles(1L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(10240L) + .principalName("test-user") + .requestId("req-123") + .otelTraceId("trace-abc") + .otelSpanId("span-xyz") + .reportTraceId("report-trace-time-query") + .roles(expectedRoles) + .build(); + + persistence.writeScanMetricsReport(report); + + // Query by time range and verify roles are returned + List results = + persistence.queryScanMetricsReports( + "test-catalog-roles-query", + "db.schema", + "test_table_roles", + timestamp - 1000, + timestamp + 1000, + 100); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getReportId()).isEqualTo(reportId); + assertThat(results.get(0).getRoles()).containsExactlyInAnyOrderElementsOf(expectedRoles); + } + /** * Creates a JdbcBasePersistenceImpl with the specified schema version. This uses the same * datasource but with a different reported schema version to test graceful degradation. From 53145ea29878b044c329d32739d2aed2991bea7e Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 08:51:00 -0800 Subject: [PATCH 38/67] Integrate MetricsPersistence SPI with JDBC implementation - Add JdbcMetricsPersistence implementing MetricsPersistence interface - Add SpiModelConverter for type conversions between SPI and JDBC models - Add getOrCreateMetricsPersistence() to MetaStoreManagerFactory interface - Override in JdbcMetaStoreManagerFactory to provide JDBC implementation This bridges the gap between the backend-agnostic SPI interface (ScanMetricsRecord, CommitMetricsRecord) and the JDBC-specific model classes (ModelScanMetricsReport, ModelCommitMetricsReport). --- .../jdbc/JdbcMetaStoreManagerFactory.java | 16 + .../jdbc/JdbcMetricsPersistence.java | 127 ++++++++ .../relational/jdbc/SpiModelConverter.java | 286 ++++++++++++++++++ .../persistence/MetaStoreManagerFactory.java | 16 + 4 files changed, 445 insertions(+) create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java index 26f38fc31b..a52e2fdca2 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java @@ -52,6 +52,7 @@ import org.apache.polaris.core.persistence.cache.InMemoryEntityCache; import org.apache.polaris.core.persistence.dao.entity.BaseResult; import org.apache.polaris.core.persistence.dao.entity.PrincipalSecretsResult; +import org.apache.polaris.core.persistence.metrics.MetricsPersistence; import org.apache.polaris.core.storage.PolarisStorageIntegrationProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -247,6 +248,21 @@ public synchronized EntityCache getOrCreateEntityCache( return entityCacheMap.get(realmContext.getRealmIdentifier()); } + @Override + public synchronized MetricsPersistence getOrCreateMetricsPersistence(RealmContext realmContext) { + // Ensure the session is initialized for this realm + BasePersistence session = getOrCreateSession(realmContext); + + if (session instanceof JdbcBasePersistenceImpl jdbcPersistence) { + // Return JDBC-specific metrics persistence if schema version supports it + // The JdbcMetricsPersistence will gracefully handle unsupported schemas + return new JdbcMetricsPersistence(jdbcPersistence, realmContext.getRealmIdentifier()); + } + + // Fallback to no-op for non-JDBC sessions (shouldn't happen in JDBC factory) + return MetricsPersistence.NOOP; + } + /** * In this method we check if Service was bootstrapped for a given realm, i.e. that all the * entities were created (root principal, root principal role, etc) If service was not diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java new file mode 100644 index 0000000000..883c4b38b4 --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc; + +import jakarta.annotation.Nonnull; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; +import org.apache.polaris.core.persistence.metrics.MetricsPersistence; +import org.apache.polaris.core.persistence.metrics.MetricsQueryCriteria; +import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; +import org.apache.polaris.core.persistence.pagination.Page; +import org.apache.polaris.core.persistence.pagination.PageToken; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; + +/** + * JDBC implementation of {@link MetricsPersistence}. + * + *

    This class bridges the SPI interface with the existing JDBC persistence implementation, + * converting between SPI record types ({@link ScanMetricsRecord}, {@link CommitMetricsRecord}) and + * JDBC model types ({@link ModelScanMetricsReport}, {@link ModelCommitMetricsReport}). + */ +public class JdbcMetricsPersistence implements MetricsPersistence { + + private final JdbcBasePersistenceImpl jdbcPersistence; + private final String realmId; + + /** + * Creates a new JdbcMetricsPersistence instance. + * + * @param jdbcPersistence the underlying JDBC persistence implementation + * @param realmId the realm ID for multi-tenancy + */ + public JdbcMetricsPersistence(JdbcBasePersistenceImpl jdbcPersistence, String realmId) { + this.jdbcPersistence = jdbcPersistence; + this.realmId = realmId; + } + + @Override + public void writeScanReport(@Nonnull ScanMetricsRecord record) { + if (!jdbcPersistence.supportsMetricsPersistence()) { + return; + } + ModelScanMetricsReport model = SpiModelConverter.toModelScanReport(record, realmId); + jdbcPersistence.writeScanMetricsReport(model); + } + + @Override + public void writeCommitReport(@Nonnull CommitMetricsRecord record) { + if (!jdbcPersistence.supportsMetricsPersistence()) { + return; + } + ModelCommitMetricsReport model = SpiModelConverter.toModelCommitReport(record, realmId); + jdbcPersistence.writeCommitMetricsReport(model); + } + + @Override + @Nonnull + public Page queryScanReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { + if (!jdbcPersistence.supportsMetricsPersistence()) { + return Page.fromItems(List.of()); + } + + int limit = pageToken.pageSize().orElse(100); + Long startTimeMs = criteria.startTime().map(t -> t.toEpochMilli()).orElse(null); + Long endTimeMs = criteria.endTime().map(t -> t.toEpochMilli()).orElse(null); + + List models = + jdbcPersistence.queryScanMetricsReports( + criteria.catalogName().orElse(""), + criteria.namespace().orElse(""), + criteria.tableName().orElse(""), + startTimeMs, + endTimeMs, + limit); + + List records = + models.stream().map(SpiModelConverter::toScanMetricsRecord).collect(Collectors.toList()); + + return Page.fromItems(records); + } + + @Override + @Nonnull + public Page queryCommitReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { + if (!jdbcPersistence.supportsMetricsPersistence()) { + return Page.fromItems(List.of()); + } + + int limit = pageToken.pageSize().orElse(100); + Long startTimeMs = criteria.startTime().map(t -> t.toEpochMilli()).orElse(null); + Long endTimeMs = criteria.endTime().map(t -> t.toEpochMilli()).orElse(null); + + List models = + jdbcPersistence.queryCommitMetricsReports( + criteria.catalogName().orElse(""), + criteria.namespace().orElse(""), + criteria.tableName().orElse(""), + startTimeMs, + endTimeMs, + limit); + + List records = + models.stream().map(SpiModelConverter::toCommitMetricsRecord).collect(Collectors.toList()); + + return Page.fromItems(records); + } +} diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java new file mode 100644 index 0000000000..2ac7e89fb8 --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.time.Instant; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; +import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; +import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelScanMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; + +/** + * Converter between SPI metrics records and JDBC model classes. + * + *

    This utility class provides methods to convert between the backend-agnostic SPI types ({@link + * ScanMetricsRecord}, {@link CommitMetricsRecord}) and the JDBC-specific model types ({@link + * ModelScanMetricsReport}, {@link ModelCommitMetricsReport}). + * + *

    Key conversions handled: + * + *

      + *
    • catalogId: long (SPI) ↔ String (Model) + *
    • tableIdentifier: TableIdentifier (SPI) ↔ separate namespace/tableName strings (Model) + *
    • timestamp: Instant (SPI) ↔ long milliseconds (Model) + *
    • metadata: Map<String, String> (SPI) ↔ JSON string (Model) + *
    • projectedFieldIds/Names: List (SPI) ↔ comma-separated string (Model) + *
    + */ +public final class SpiModelConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private SpiModelConverter() { + // Utility class + } + + /** + * Converts a ScanMetricsRecord (SPI) to ModelScanMetricsReport (JDBC). + * + * @param record the SPI record + * @param realmId the realm ID for multi-tenancy + * @return the JDBC model + */ + public static ModelScanMetricsReport toModelScanReport(ScanMetricsRecord record, String realmId) { + return ImmutableModelScanMetricsReport.builder() + .reportId(record.reportId()) + .realmId(realmId) + .catalogId(String.valueOf(record.catalogId())) + .catalogName(record.catalogName()) + .namespace(record.tableIdentifier().namespace().toString()) + .tableName(record.tableIdentifier().name()) + .timestampMs(record.timestamp().toEpochMilli()) + .snapshotId(record.snapshotId().orElse(null)) + .schemaId(record.schemaId().orElse(null)) + .filterExpression(record.filterExpression().orElse(null)) + .projectedFieldIds(toCommaSeparated(record.projectedFieldIds())) + .projectedFieldNames(toCommaSeparated(record.projectedFieldNames())) + .resultDataFiles(record.resultDataFiles()) + .resultDeleteFiles(record.resultDeleteFiles()) + .totalFileSizeBytes(record.totalFileSizeBytes()) + .totalDataManifests(record.totalDataManifests()) + .totalDeleteManifests(record.totalDeleteManifests()) + .scannedDataManifests(record.scannedDataManifests()) + .scannedDeleteManifests(record.scannedDeleteManifests()) + .skippedDataManifests(record.skippedDataManifests()) + .skippedDeleteManifests(record.skippedDeleteManifests()) + .skippedDataFiles(record.skippedDataFiles()) + .skippedDeleteFiles(record.skippedDeleteFiles()) + .totalPlanningDurationMs(record.totalPlanningDurationMs()) + .equalityDeleteFiles(record.equalityDeleteFiles()) + .positionalDeleteFiles(record.positionalDeleteFiles()) + .indexedDeleteFiles(record.indexedDeleteFiles()) + .totalDeleteFileSizeBytes(record.totalDeleteFileSizeBytes()) + .metadata(toJsonString(record.metadata())) + .build(); + } + + /** + * Converts a CommitMetricsRecord (SPI) to ModelCommitMetricsReport (JDBC). + * + * @param record the SPI record + * @param realmId the realm ID for multi-tenancy + * @return the JDBC model + */ + public static ModelCommitMetricsReport toModelCommitReport( + CommitMetricsRecord record, String realmId) { + return ImmutableModelCommitMetricsReport.builder() + .reportId(record.reportId()) + .realmId(realmId) + .catalogId(String.valueOf(record.catalogId())) + .catalogName(record.catalogName()) + .namespace(record.tableIdentifier().namespace().toString()) + .tableName(record.tableIdentifier().name()) + .timestampMs(record.timestamp().toEpochMilli()) + .snapshotId(record.snapshotId()) + .sequenceNumber(record.sequenceNumber().orElse(null)) + .operation(record.operation()) + .addedDataFiles(record.addedDataFiles()) + .removedDataFiles(record.removedDataFiles()) + .totalDataFiles(record.totalDataFiles()) + .addedDeleteFiles(record.addedDeleteFiles()) + .removedDeleteFiles(record.removedDeleteFiles()) + .totalDeleteFiles(record.totalDeleteFiles()) + .addedEqualityDeleteFiles(record.addedEqualityDeleteFiles()) + .removedEqualityDeleteFiles(record.removedEqualityDeleteFiles()) + .addedPositionalDeleteFiles(record.addedPositionalDeleteFiles()) + .removedPositionalDeleteFiles(record.removedPositionalDeleteFiles()) + .addedRecords(record.addedRecords()) + .removedRecords(record.removedRecords()) + .totalRecords(record.totalRecords()) + .addedFileSizeBytes(record.addedFileSizeBytes()) + .removedFileSizeBytes(record.removedFileSizeBytes()) + .totalFileSizeBytes(record.totalFileSizeBytes()) + .totalDurationMs(record.totalDurationMs().orElse(0L)) + .attempts(record.attempts()) + .metadata(toJsonString(record.metadata())) + .build(); + } + + /** + * Converts a ModelScanMetricsReport (JDBC) to ScanMetricsRecord (SPI). + * + * @param model the JDBC model + * @return the SPI record + */ + public static ScanMetricsRecord toScanMetricsRecord(ModelScanMetricsReport model) { + return ScanMetricsRecord.builder() + .reportId(model.getReportId()) + .catalogId(Long.parseLong(model.getCatalogId())) + .catalogName(model.getCatalogName()) + .tableIdentifier(parseTableIdentifier(model.getNamespace(), model.getTableName())) + .timestamp(Instant.ofEpochMilli(model.getTimestampMs())) + .snapshotId(Optional.ofNullable(model.getSnapshotId())) + .schemaId(Optional.ofNullable(model.getSchemaId())) + .filterExpression(Optional.ofNullable(model.getFilterExpression())) + .projectedFieldIds(parseIntList(model.getProjectedFieldIds())) + .projectedFieldNames(parseStringList(model.getProjectedFieldNames())) + .resultDataFiles(model.getResultDataFiles()) + .resultDeleteFiles(model.getResultDeleteFiles()) + .totalFileSizeBytes(model.getTotalFileSizeBytes()) + .totalDataManifests(model.getTotalDataManifests()) + .totalDeleteManifests(model.getTotalDeleteManifests()) + .scannedDataManifests(model.getScannedDataManifests()) + .scannedDeleteManifests(model.getScannedDeleteManifests()) + .skippedDataManifests(model.getSkippedDataManifests()) + .skippedDeleteManifests(model.getSkippedDeleteManifests()) + .skippedDataFiles(model.getSkippedDataFiles()) + .skippedDeleteFiles(model.getSkippedDeleteFiles()) + .totalPlanningDurationMs(model.getTotalPlanningDurationMs()) + .equalityDeleteFiles(model.getEqualityDeleteFiles()) + .positionalDeleteFiles(model.getPositionalDeleteFiles()) + .indexedDeleteFiles(model.getIndexedDeleteFiles()) + .totalDeleteFileSizeBytes(model.getTotalDeleteFileSizeBytes()) + .metadata(parseMetadataJson(model.getMetadata())) + .build(); + } + + /** + * Converts a ModelCommitMetricsReport (JDBC) to CommitMetricsRecord (SPI). + * + * @param model the JDBC model + * @return the SPI record + */ + public static CommitMetricsRecord toCommitMetricsRecord(ModelCommitMetricsReport model) { + return CommitMetricsRecord.builder() + .reportId(model.getReportId()) + .catalogId(Long.parseLong(model.getCatalogId())) + .catalogName(model.getCatalogName()) + .tableIdentifier(parseTableIdentifier(model.getNamespace(), model.getTableName())) + .timestamp(Instant.ofEpochMilli(model.getTimestampMs())) + .snapshotId(model.getSnapshotId()) + .sequenceNumber(Optional.ofNullable(model.getSequenceNumber())) + .operation(model.getOperation()) + .addedDataFiles(model.getAddedDataFiles()) + .removedDataFiles(model.getRemovedDataFiles()) + .totalDataFiles(model.getTotalDataFiles()) + .addedDeleteFiles(model.getAddedDeleteFiles()) + .removedDeleteFiles(model.getRemovedDeleteFiles()) + .totalDeleteFiles(model.getTotalDeleteFiles()) + .addedEqualityDeleteFiles(model.getAddedEqualityDeleteFiles()) + .removedEqualityDeleteFiles(model.getRemovedEqualityDeleteFiles()) + .addedPositionalDeleteFiles(model.getAddedPositionalDeleteFiles()) + .removedPositionalDeleteFiles(model.getRemovedPositionalDeleteFiles()) + .addedRecords(model.getAddedRecords()) + .removedRecords(model.getRemovedRecords()) + .totalRecords(model.getTotalRecords()) + .addedFileSizeBytes(model.getAddedFileSizeBytes()) + .removedFileSizeBytes(model.getRemovedFileSizeBytes()) + .totalFileSizeBytes(model.getTotalFileSizeBytes()) + .totalDurationMs( + model.getTotalDurationMs() > 0 + ? Optional.of(model.getTotalDurationMs()) + : Optional.empty()) + .attempts(model.getAttempts()) + .metadata(parseMetadataJson(model.getMetadata())) + .build(); + } + + // === Helper Methods === + + private static TableIdentifier parseTableIdentifier(String namespace, String tableName) { + if (namespace == null || namespace.isEmpty()) { + return TableIdentifier.of(Namespace.empty(), tableName); + } + // Namespace.toString() uses "." as separator, so we split by "." + String[] levels = namespace.split("\\."); + return TableIdentifier.of(Namespace.of(levels), tableName); + } + + private static String toCommaSeparated(List list) { + if (list == null || list.isEmpty()) { + return null; + } + return list.stream().map(Object::toString).collect(Collectors.joining(",")); + } + + private static List parseIntList(String commaSeparated) { + if (commaSeparated == null || commaSeparated.isEmpty()) { + return Collections.emptyList(); + } + return java.util.Arrays.stream(commaSeparated.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(Integer::parseInt) + .collect(Collectors.toList()); + } + + private static List parseStringList(String commaSeparated) { + if (commaSeparated == null || commaSeparated.isEmpty()) { + return Collections.emptyList(); + } + return java.util.Arrays.stream(commaSeparated.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .collect(Collectors.toList()); + } + + private static String toJsonString(Map map) { + if (map == null || map.isEmpty()) { + return "{}"; + } + try { + return OBJECT_MAPPER.writeValueAsString(map); + } catch (JsonProcessingException e) { + return "{}"; + } + } + + private static Map parseMetadataJson(String json) { + if (json == null || json.isEmpty() || "{}".equals(json)) { + return Collections.emptyMap(); + } + try { + return OBJECT_MAPPER.readValue(json, new TypeReference>() {}); + } catch (JsonProcessingException e) { + return Collections.emptyMap(); + } + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/MetaStoreManagerFactory.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/MetaStoreManagerFactory.java index 4a32a88591..142eee5233 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/MetaStoreManagerFactory.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/MetaStoreManagerFactory.java @@ -26,6 +26,7 @@ import org.apache.polaris.core.persistence.cache.EntityCache; import org.apache.polaris.core.persistence.dao.entity.BaseResult; import org.apache.polaris.core.persistence.dao.entity.PrincipalSecretsResult; +import org.apache.polaris.core.persistence.metrics.MetricsPersistence; /** Configuration interface for configuring the {@link PolarisMetaStoreManager}. */ public interface MetaStoreManagerFactory { @@ -36,6 +37,21 @@ public interface MetaStoreManagerFactory { EntityCache getOrCreateEntityCache(RealmContext realmContext, RealmConfig realmConfig); + /** + * Gets or creates a metrics persistence instance for the given realm context. + * + *

    The default implementation returns a no-op implementation that silently ignores write + * operations and returns empty pages for queries. Persistence backends that support metrics + * storage (e.g., JDBC with schema v4+) should override this method to provide a functional + * implementation. + * + * @param realmContext the realm context + * @return a MetricsPersistence implementation for the realm + */ + default MetricsPersistence getOrCreateMetricsPersistence(RealmContext realmContext) { + return MetricsPersistence.NOOP; + } + Map bootstrapRealms( Iterable realms, RootCredentialsSet rootCredentialsSet); From 2be4d9da9fc1e8ff44a078588f075802797a74d1 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 08:53:55 -0800 Subject: [PATCH 39/67] Add unit tests for SpiModelConverter - Test ScanMetricsRecord to ModelScanMetricsReport conversion - Test CommitMetricsRecord to ModelCommitMetricsReport conversion - Test round-trip conversions for both record types - Test edge cases: empty namespace, null optional fields, empty metadata - Add iceberg-api test dependency for TableIdentifier/Namespace classes --- persistence/relational-jdbc/build.gradle.kts | 4 + .../jdbc/SpiModelConverterTest.java | 405 ++++++++++++++++++ 2 files changed, 409 insertions(+) create mode 100644 persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java diff --git a/persistence/relational-jdbc/build.gradle.kts b/persistence/relational-jdbc/build.gradle.kts index 3de2526dd3..3b9c7e2b07 100644 --- a/persistence/relational-jdbc/build.gradle.kts +++ b/persistence/relational-jdbc/build.gradle.kts @@ -49,6 +49,10 @@ dependencies { testImplementation(libs.h2) testImplementation(testFixtures(project(":polaris-core"))) + // Iceberg API for SpiModelConverter tests + testImplementation(platform(libs.iceberg.bom)) + testImplementation("org.apache.iceberg:iceberg-api") + testImplementation(platform(libs.testcontainers.bom)) testImplementation("org.testcontainers:testcontainers-junit-jupiter") diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java new file mode 100644 index 0000000000..f89cdf4df3 --- /dev/null +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; +import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; +import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelScanMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; +import org.junit.jupiter.api.Test; + +/** Unit tests for {@link SpiModelConverter}. */ +public class SpiModelConverterTest { + + private static final String TEST_REPORT_ID = "report-123"; + private static final String TEST_REALM_ID = "realm-1"; + private static final long TEST_CATALOG_ID = 12345L; + private static final String TEST_CATALOG_NAME = "my_catalog"; + private static final String TEST_NAMESPACE = "db.schema"; + private static final String TEST_TABLE_NAME = "my_table"; + private static final Instant TEST_TIMESTAMP = Instant.ofEpochMilli(1704067200000L); + private static final long TEST_TIMESTAMP_MS = 1704067200000L; + + // === Scan Metrics Test === + + @Test + void testToModelScanReport() { + ScanMetricsRecord record = createTestScanRecord(); + + ModelScanMetricsReport model = SpiModelConverter.toModelScanReport(record, TEST_REALM_ID); + + assertThat(model.getReportId()).isEqualTo(TEST_REPORT_ID); + assertThat(model.getRealmId()).isEqualTo(TEST_REALM_ID); + assertThat(model.getCatalogId()).isEqualTo(String.valueOf(TEST_CATALOG_ID)); + assertThat(model.getCatalogName()).isEqualTo(TEST_CATALOG_NAME); + assertThat(model.getNamespace()).isEqualTo(TEST_NAMESPACE); + assertThat(model.getTableName()).isEqualTo(TEST_TABLE_NAME); + assertThat(model.getTimestampMs()).isEqualTo(TEST_TIMESTAMP_MS); + assertThat(model.getSnapshotId()).isEqualTo(123456789L); + assertThat(model.getSchemaId()).isEqualTo(1); + assertThat(model.getFilterExpression()).isEqualTo("id > 100"); + assertThat(model.getProjectedFieldIds()).isEqualTo("1,2,3"); + assertThat(model.getProjectedFieldNames()).isEqualTo("id,name,value"); + assertThat(model.getResultDataFiles()).isEqualTo(10L); + assertThat(model.getResultDeleteFiles()).isEqualTo(2L); + assertThat(model.getTotalFileSizeBytes()).isEqualTo(1024000L); + assertThat(model.getMetadata()).isEqualTo("{\"custom\":\"value\"}"); + } + + @Test + void testToScanMetricsRecord() { + ModelScanMetricsReport model = createTestModelScanReport(); + + ScanMetricsRecord record = SpiModelConverter.toScanMetricsRecord(model); + + assertThat(record.reportId()).isEqualTo(TEST_REPORT_ID); + assertThat(record.catalogId()).isEqualTo(TEST_CATALOG_ID); + assertThat(record.catalogName()).isEqualTo(TEST_CATALOG_NAME); + assertThat(record.tableIdentifier().namespace().toString()).isEqualTo(TEST_NAMESPACE); + assertThat(record.tableIdentifier().name()).isEqualTo(TEST_TABLE_NAME); + assertThat(record.timestamp()).isEqualTo(TEST_TIMESTAMP); + assertThat(record.snapshotId()).isEqualTo(Optional.of(123456789L)); + assertThat(record.schemaId()).isEqualTo(Optional.of(1)); + assertThat(record.filterExpression()).isEqualTo(Optional.of("id > 100")); + assertThat(record.projectedFieldIds()).containsExactly(1, 2, 3); + assertThat(record.projectedFieldNames()).containsExactly("id", "name", "value"); + assertThat(record.resultDataFiles()).isEqualTo(10L); + assertThat(record.metadata()).containsEntry("custom", "value"); + } + + @Test + void testScanRecordRoundTrip() { + ScanMetricsRecord original = createTestScanRecord(); + + ModelScanMetricsReport model = SpiModelConverter.toModelScanReport(original, TEST_REALM_ID); + ScanMetricsRecord roundTripped = SpiModelConverter.toScanMetricsRecord(model); + + assertThat(roundTripped.reportId()).isEqualTo(original.reportId()); + assertThat(roundTripped.catalogId()).isEqualTo(original.catalogId()); + assertThat(roundTripped.catalogName()).isEqualTo(original.catalogName()); + assertThat(roundTripped.tableIdentifier()).isEqualTo(original.tableIdentifier()); + assertThat(roundTripped.timestamp()).isEqualTo(original.timestamp()); + assertThat(roundTripped.resultDataFiles()).isEqualTo(original.resultDataFiles()); + } + + // === Commit Metrics Test === + + @Test + void testToModelCommitReport() { + CommitMetricsRecord record = createTestCommitRecord(); + + ModelCommitMetricsReport model = SpiModelConverter.toModelCommitReport(record, TEST_REALM_ID); + + assertThat(model.getReportId()).isEqualTo(TEST_REPORT_ID); + assertThat(model.getRealmId()).isEqualTo(TEST_REALM_ID); + assertThat(model.getCatalogId()).isEqualTo(String.valueOf(TEST_CATALOG_ID)); + assertThat(model.getCatalogName()).isEqualTo(TEST_CATALOG_NAME); + assertThat(model.getNamespace()).isEqualTo(TEST_NAMESPACE); + assertThat(model.getTableName()).isEqualTo(TEST_TABLE_NAME); + assertThat(model.getTimestampMs()).isEqualTo(TEST_TIMESTAMP_MS); + assertThat(model.getSnapshotId()).isEqualTo(987654321L); + assertThat(model.getSequenceNumber()).isEqualTo(5L); + assertThat(model.getOperation()).isEqualTo("append"); + assertThat(model.getAddedDataFiles()).isEqualTo(10L); + assertThat(model.getRemovedDataFiles()).isEqualTo(2L); + assertThat(model.getTotalDataFiles()).isEqualTo(100L); + assertThat(model.getAttempts()).isEqualTo(1); + } + + @Test + void testToCommitMetricsRecord() { + ModelCommitMetricsReport model = createTestModelCommitReport(); + + CommitMetricsRecord record = SpiModelConverter.toCommitMetricsRecord(model); + + assertThat(record.reportId()).isEqualTo(TEST_REPORT_ID); + assertThat(record.catalogId()).isEqualTo(TEST_CATALOG_ID); + assertThat(record.catalogName()).isEqualTo(TEST_CATALOG_NAME); + assertThat(record.tableIdentifier().namespace().toString()).isEqualTo(TEST_NAMESPACE); + assertThat(record.tableIdentifier().name()).isEqualTo(TEST_TABLE_NAME); + assertThat(record.timestamp()).isEqualTo(TEST_TIMESTAMP); + assertThat(record.snapshotId()).isEqualTo(987654321L); + assertThat(record.sequenceNumber()).isEqualTo(Optional.of(5L)); + assertThat(record.operation()).isEqualTo("append"); + assertThat(record.addedDataFiles()).isEqualTo(10L); + assertThat(record.attempts()).isEqualTo(1); + } + + @Test + void testCommitRecordRoundTrip() { + CommitMetricsRecord original = createTestCommitRecord(); + + ModelCommitMetricsReport model = SpiModelConverter.toModelCommitReport(original, TEST_REALM_ID); + CommitMetricsRecord roundTripped = SpiModelConverter.toCommitMetricsRecord(model); + + assertThat(roundTripped.reportId()).isEqualTo(original.reportId()); + assertThat(roundTripped.catalogId()).isEqualTo(original.catalogId()); + assertThat(roundTripped.catalogName()).isEqualTo(original.catalogName()); + assertThat(roundTripped.tableIdentifier()).isEqualTo(original.tableIdentifier()); + assertThat(roundTripped.timestamp()).isEqualTo(original.timestamp()); + assertThat(roundTripped.snapshotId()).isEqualTo(original.snapshotId()); + assertThat(roundTripped.operation()).isEqualTo(original.operation()); + } + + // === Edge Cases === + + @Test + void testEmptyNamespace() { + ScanMetricsRecord record = + ScanMetricsRecord.builder() + .reportId(TEST_REPORT_ID) + .catalogId(TEST_CATALOG_ID) + .catalogName(TEST_CATALOG_NAME) + .tableIdentifier(TableIdentifier.of(Namespace.empty(), TEST_TABLE_NAME)) + .timestamp(TEST_TIMESTAMP) + .resultDataFiles(0L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(0L) + .totalDataManifests(0L) + .totalDeleteManifests(0L) + .scannedDataManifests(0L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(0L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + + ModelScanMetricsReport model = SpiModelConverter.toModelScanReport(record, TEST_REALM_ID); + assertThat(model.getNamespace()).isEmpty(); + + ScanMetricsRecord roundTripped = SpiModelConverter.toScanMetricsRecord(model); + assertThat(roundTripped.tableIdentifier().namespace()).isEqualTo(Namespace.empty()); + } + + @Test + void testNullOptionalFields() { + ScanMetricsRecord record = + ScanMetricsRecord.builder() + .reportId(TEST_REPORT_ID) + .catalogId(TEST_CATALOG_ID) + .catalogName(TEST_CATALOG_NAME) + .tableIdentifier(TableIdentifier.of(Namespace.of("db"), TEST_TABLE_NAME)) + .timestamp(TEST_TIMESTAMP) + .resultDataFiles(0L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(0L) + .totalDataManifests(0L) + .totalDeleteManifests(0L) + .scannedDataManifests(0L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(0L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + + ModelScanMetricsReport model = SpiModelConverter.toModelScanReport(record, TEST_REALM_ID); + assertThat(model.getSnapshotId()).isNull(); + assertThat(model.getSchemaId()).isNull(); + assertThat(model.getFilterExpression()).isNull(); + assertThat(model.getProjectedFieldIds()).isNull(); + assertThat(model.getProjectedFieldNames()).isNull(); + } + + @Test + void testEmptyMetadata() { + ScanMetricsRecord record = + ScanMetricsRecord.builder() + .reportId(TEST_REPORT_ID) + .catalogId(TEST_CATALOG_ID) + .catalogName(TEST_CATALOG_NAME) + .tableIdentifier(TableIdentifier.of(Namespace.of("db"), TEST_TABLE_NAME)) + .timestamp(TEST_TIMESTAMP) + .resultDataFiles(0L) + .resultDeleteFiles(0L) + .totalFileSizeBytes(0L) + .totalDataManifests(0L) + .totalDeleteManifests(0L) + .scannedDataManifests(0L) + .scannedDeleteManifests(0L) + .skippedDataManifests(0L) + .skippedDeleteManifests(0L) + .skippedDataFiles(0L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(0L) + .equalityDeleteFiles(0L) + .positionalDeleteFiles(0L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(0L) + .build(); + + ModelScanMetricsReport model = SpiModelConverter.toModelScanReport(record, TEST_REALM_ID); + assertThat(model.getMetadata()).isEqualTo("{}"); + } + + // === Helper Methods === + + private ScanMetricsRecord createTestScanRecord() { + return ScanMetricsRecord.builder() + .reportId(TEST_REPORT_ID) + .catalogId(TEST_CATALOG_ID) + .catalogName(TEST_CATALOG_NAME) + .tableIdentifier(TableIdentifier.of(Namespace.of("db", "schema"), TEST_TABLE_NAME)) + .timestamp(TEST_TIMESTAMP) + .snapshotId(123456789L) + .schemaId(1) + .filterExpression("id > 100") + .projectedFieldIds(List.of(1, 2, 3)) + .projectedFieldNames(List.of("id", "name", "value")) + .resultDataFiles(10L) + .resultDeleteFiles(2L) + .totalFileSizeBytes(1024000L) + .totalDataManifests(5L) + .totalDeleteManifests(1L) + .scannedDataManifests(3L) + .scannedDeleteManifests(1L) + .skippedDataManifests(2L) + .skippedDeleteManifests(0L) + .skippedDataFiles(5L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(150L) + .equalityDeleteFiles(1L) + .positionalDeleteFiles(1L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(2048L) + .metadata(Map.of("custom", "value")) + .build(); + } + + private ModelScanMetricsReport createTestModelScanReport() { + return ImmutableModelScanMetricsReport.builder() + .reportId(TEST_REPORT_ID) + .realmId(TEST_REALM_ID) + .catalogId(String.valueOf(TEST_CATALOG_ID)) + .catalogName(TEST_CATALOG_NAME) + .namespace(TEST_NAMESPACE) + .tableName(TEST_TABLE_NAME) + .timestampMs(TEST_TIMESTAMP_MS) + .snapshotId(123456789L) + .schemaId(1) + .filterExpression("id > 100") + .projectedFieldIds("1,2,3") + .projectedFieldNames("id,name,value") + .resultDataFiles(10L) + .resultDeleteFiles(2L) + .totalFileSizeBytes(1024000L) + .totalDataManifests(5L) + .totalDeleteManifests(1L) + .scannedDataManifests(3L) + .scannedDeleteManifests(1L) + .skippedDataManifests(2L) + .skippedDeleteManifests(0L) + .skippedDataFiles(5L) + .skippedDeleteFiles(0L) + .totalPlanningDurationMs(150L) + .equalityDeleteFiles(1L) + .positionalDeleteFiles(1L) + .indexedDeleteFiles(0L) + .totalDeleteFileSizeBytes(2048L) + .metadata("{\"custom\":\"value\"}") + .build(); + } + + private CommitMetricsRecord createTestCommitRecord() { + return CommitMetricsRecord.builder() + .reportId(TEST_REPORT_ID) + .catalogId(TEST_CATALOG_ID) + .catalogName(TEST_CATALOG_NAME) + .tableIdentifier(TableIdentifier.of(Namespace.of("db", "schema"), TEST_TABLE_NAME)) + .timestamp(TEST_TIMESTAMP) + .snapshotId(987654321L) + .sequenceNumber(5L) + .operation("append") + .addedDataFiles(10L) + .removedDataFiles(2L) + .totalDataFiles(100L) + .addedDeleteFiles(1L) + .removedDeleteFiles(0L) + .totalDeleteFiles(5L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(1L) + .removedPositionalDeleteFiles(0L) + .addedRecords(1000L) + .removedRecords(50L) + .totalRecords(50000L) + .addedFileSizeBytes(102400L) + .removedFileSizeBytes(5120L) + .totalFileSizeBytes(5120000L) + .totalDurationMs(250L) + .attempts(1) + .metadata(Map.of("custom", "value")) + .build(); + } + + private ModelCommitMetricsReport createTestModelCommitReport() { + return ImmutableModelCommitMetricsReport.builder() + .reportId(TEST_REPORT_ID) + .realmId(TEST_REALM_ID) + .catalogId(String.valueOf(TEST_CATALOG_ID)) + .catalogName(TEST_CATALOG_NAME) + .namespace(TEST_NAMESPACE) + .tableName(TEST_TABLE_NAME) + .timestampMs(TEST_TIMESTAMP_MS) + .snapshotId(987654321L) + .sequenceNumber(5L) + .operation("append") + .addedDataFiles(10L) + .removedDataFiles(2L) + .totalDataFiles(100L) + .addedDeleteFiles(1L) + .removedDeleteFiles(0L) + .totalDeleteFiles(5L) + .addedEqualityDeleteFiles(0L) + .removedEqualityDeleteFiles(0L) + .addedPositionalDeleteFiles(1L) + .removedPositionalDeleteFiles(0L) + .addedRecords(1000L) + .removedRecords(50L) + .totalRecords(50000L) + .addedFileSizeBytes(102400L) + .removedFileSizeBytes(5120L) + .totalFileSizeBytes(5120000L) + .totalDurationMs(250L) + .attempts(1) + .metadata("{\"custom\":\"value\"}") + .build(); + } +} From a6a67f022e6668a35b9d670c4b13f00dea1dc0f1 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 09:14:35 -0800 Subject: [PATCH 40/67] fix: Replace AttributeMap with EventAttributeMap and apply spotless fixes --- .../iceberg/IcebergCatalogAdapter.java | 4 +- ...ebergRestCatalogEventServiceDelegator.java | 102 +++++++++--------- .../MetricsReportingConfiguration.java | 1 - ...oryBufferEventListenerIntegrationTest.java | 6 -- .../listeners/TestPolarisEventListener.java | 1 - 5 files changed, 54 insertions(+), 60 deletions(-) diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java index fb54b5572d..a056e1361c 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java @@ -71,6 +71,7 @@ import org.apache.polaris.service.catalog.io.StorageAccessConfigProvider; import org.apache.polaris.service.config.ReservedProperties; import org.apache.polaris.service.context.catalog.CallContextCatalogFactory; +import org.apache.polaris.service.events.EventAttributeMap; import org.apache.polaris.service.http.IcebergHttpUtil; import org.apache.polaris.service.http.IfNoneMatch; import org.apache.polaris.service.reporting.PolarisMetricsReporter; @@ -191,7 +192,8 @@ IcebergCatalogHandler newHandlerWrapper(SecurityContext securityContext, String reservedProperties, catalogHandlerUtils, externalCatalogFactories, - storageAccessConfigProvider); + storageAccessConfigProvider, + new EventAttributeMap()); } @Override diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java index 3a8a35e9e2..486fd3d27c 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java @@ -45,7 +45,7 @@ import org.apache.polaris.service.catalog.CatalogPrefixParser; import org.apache.polaris.service.catalog.api.IcebergRestCatalogApiService; import org.apache.polaris.service.catalog.common.CatalogAdapter; -import org.apache.polaris.service.events.AttributeMap; +import org.apache.polaris.service.events.EventAttributeMap; import org.apache.polaris.service.events.EventAttributes; import org.apache.polaris.service.events.PolarisEvent; import org.apache.polaris.service.events.PolarisEventMetadataFactory; @@ -92,7 +92,7 @@ public Response createNamespace( new PolarisEvent( PolarisEventType.BEFORE_CREATE_NAMESPACE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.CREATE_NAMESPACE_REQUEST, createNamespaceRequest))); Response resp = @@ -102,7 +102,7 @@ public Response createNamespace( new PolarisEvent( PolarisEventType.AFTER_CREATE_NAMESPACE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, createNamespaceResponse.namespace()) .put(EventAttributes.NAMESPACE_PROPERTIES, createNamespaceResponse.properties()))); @@ -122,7 +122,7 @@ public Response listNamespaces( new PolarisEvent( PolarisEventType.BEFORE_LIST_NAMESPACES, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.PARENT_NAMESPACE_FQN, parent))); Response resp = @@ -131,7 +131,7 @@ public Response listNamespaces( new PolarisEvent( PolarisEventType.AFTER_LIST_NAMESPACES, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.PARENT_NAMESPACE_FQN, parent))); return resp; @@ -145,7 +145,7 @@ public Response loadNamespaceMetadata( new PolarisEvent( PolarisEventType.BEFORE_LOAD_NAMESPACE_METADATA, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, decodeNamespace(namespace)))); Response resp = @@ -155,7 +155,7 @@ public Response loadNamespaceMetadata( new PolarisEvent( PolarisEventType.AFTER_LOAD_NAMESPACE_METADATA, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, getNamespaceResponse.namespace()) .put(EventAttributes.NAMESPACE_PROPERTIES, getNamespaceResponse.properties()))); @@ -171,7 +171,7 @@ public Response namespaceExists( new PolarisEvent( PolarisEventType.BEFORE_CHECK_EXISTS_NAMESPACE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); Response resp = delegate.namespaceExists(prefix, namespace, realmContext, securityContext); @@ -179,7 +179,7 @@ public Response namespaceExists( new PolarisEvent( PolarisEventType.AFTER_CHECK_EXISTS_NAMESPACE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); return resp; @@ -193,7 +193,7 @@ public Response dropNamespace( new PolarisEvent( PolarisEventType.BEFORE_DROP_NAMESPACE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, decodeNamespace(namespace)))); Response resp = delegate.dropNamespace(prefix, namespace, realmContext, securityContext); @@ -201,7 +201,7 @@ public Response dropNamespace( new PolarisEvent( PolarisEventType.AFTER_DROP_NAMESPACE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE_FQN, namespace))); return resp; @@ -220,7 +220,7 @@ public Response updateProperties( new PolarisEvent( PolarisEventType.BEFORE_UPDATE_NAMESPACE_PROPERTIES, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put( @@ -233,7 +233,7 @@ public Response updateProperties( new PolarisEvent( PolarisEventType.AFTER_UPDATE_NAMESPACE_PROPERTIES, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put( @@ -256,7 +256,7 @@ public Response createTable( new PolarisEvent( PolarisEventType.BEFORE_CREATE_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.CREATE_TABLE_REQUEST, createTableRequest) @@ -274,7 +274,7 @@ public Response createTable( new PolarisEvent( PolarisEventType.AFTER_CREATE_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, createTableRequest.name()) @@ -297,7 +297,7 @@ public Response listTables( new PolarisEvent( PolarisEventType.BEFORE_LIST_TABLES, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); Response resp = @@ -306,7 +306,7 @@ public Response listTables( new PolarisEvent( PolarisEventType.AFTER_LIST_TABLES, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); return resp; @@ -328,7 +328,7 @@ public Response loadTable( new PolarisEvent( PolarisEventType.BEFORE_LOAD_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -349,7 +349,7 @@ public Response loadTable( new PolarisEvent( PolarisEventType.AFTER_LOAD_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -370,7 +370,7 @@ public Response tableExists( new PolarisEvent( PolarisEventType.BEFORE_CHECK_EXISTS_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); @@ -379,7 +379,7 @@ public Response tableExists( new PolarisEvent( PolarisEventType.AFTER_CHECK_EXISTS_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); @@ -400,7 +400,7 @@ public Response dropTable( new PolarisEvent( PolarisEventType.BEFORE_DROP_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -411,7 +411,7 @@ public Response dropTable( new PolarisEvent( PolarisEventType.AFTER_DROP_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -432,7 +432,7 @@ public Response registerTable( new PolarisEvent( PolarisEventType.BEFORE_REGISTER_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.REGISTER_TABLE_REQUEST, registerTableRequest))); @@ -443,7 +443,7 @@ public Response registerTable( new PolarisEvent( PolarisEventType.AFTER_REGISTER_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, registerTableRequest.name()) @@ -462,7 +462,7 @@ public Response renameTable( new PolarisEvent( PolarisEventType.BEFORE_RENAME_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.RENAME_TABLE_REQUEST, renameTableRequest))); Response resp = delegate.renameTable(prefix, renameTableRequest, realmContext, securityContext); @@ -470,7 +470,7 @@ public Response renameTable( new PolarisEvent( PolarisEventType.AFTER_RENAME_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.RENAME_TABLE_REQUEST, renameTableRequest))); return resp; @@ -490,7 +490,7 @@ public Response updateTable( new PolarisEvent( PolarisEventType.BEFORE_UPDATE_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -502,7 +502,7 @@ public Response updateTable( new PolarisEvent( PolarisEventType.AFTER_UPDATE_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -524,7 +524,7 @@ public Response createView( new PolarisEvent( PolarisEventType.BEFORE_CREATE_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.CREATE_VIEW_REQUEST, createViewRequest))); @@ -534,7 +534,7 @@ public Response createView( new PolarisEvent( PolarisEventType.AFTER_CREATE_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, createViewRequest.name()) @@ -556,7 +556,7 @@ public Response listViews( new PolarisEvent( PolarisEventType.BEFORE_LIST_VIEWS, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); Response resp = @@ -565,7 +565,7 @@ public Response listViews( new PolarisEvent( PolarisEventType.AFTER_LIST_VIEWS, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj))); return resp; @@ -584,7 +584,7 @@ public Response loadCredentials( new PolarisEvent( PolarisEventType.BEFORE_LOAD_CREDENTIALS, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); @@ -594,7 +594,7 @@ public Response loadCredentials( new PolarisEvent( PolarisEventType.AFTER_LOAD_CREDENTIALS, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); @@ -614,7 +614,7 @@ public Response loadView( new PolarisEvent( PolarisEventType.BEFORE_LOAD_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -623,7 +623,7 @@ public Response loadView( new PolarisEvent( PolarisEventType.AFTER_LOAD_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view) @@ -644,7 +644,7 @@ public Response viewExists( new PolarisEvent( PolarisEventType.BEFORE_CHECK_EXISTS_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -653,7 +653,7 @@ public Response viewExists( new PolarisEvent( PolarisEventType.AFTER_CHECK_EXISTS_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -673,7 +673,7 @@ public Response dropView( new PolarisEvent( PolarisEventType.BEFORE_DROP_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -682,7 +682,7 @@ public Response dropView( new PolarisEvent( PolarisEventType.AFTER_DROP_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view))); @@ -700,7 +700,7 @@ public Response renameView( new PolarisEvent( PolarisEventType.BEFORE_RENAME_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.RENAME_TABLE_REQUEST, renameTableRequest))); Response resp = delegate.renameView(prefix, renameTableRequest, realmContext, securityContext); @@ -708,7 +708,7 @@ public Response renameView( new PolarisEvent( PolarisEventType.AFTER_RENAME_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.RENAME_TABLE_REQUEST, renameTableRequest))); return resp; @@ -728,7 +728,7 @@ public Response replaceView( new PolarisEvent( PolarisEventType.BEFORE_REPLACE_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view) @@ -740,7 +740,7 @@ public Response replaceView( new PolarisEvent( PolarisEventType.AFTER_REPLACE_VIEW, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.VIEW_NAME, view) @@ -760,7 +760,7 @@ public Response commitTransaction( new PolarisEvent( PolarisEventType.BEFORE_COMMIT_TRANSACTION, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.COMMIT_TRANSACTION_REQUEST, commitTransactionRequest))); for (UpdateTableRequest req : commitTransactionRequest.tableChanges()) { @@ -768,7 +768,7 @@ public Response commitTransaction( new PolarisEvent( PolarisEventType.BEFORE_UPDATE_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, req.identifier().namespace()) .put(EventAttributes.TABLE_NAME, req.identifier().name()) @@ -780,7 +780,7 @@ public Response commitTransaction( new PolarisEvent( PolarisEventType.AFTER_COMMIT_TRANSACTION, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.COMMIT_TRANSACTION_REQUEST, commitTransactionRequest))); for (UpdateTableRequest req : commitTransactionRequest.tableChanges()) { @@ -788,7 +788,7 @@ public Response commitTransaction( new PolarisEvent( PolarisEventType.AFTER_UPDATE_TABLE, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, req.identifier().namespace()) .put(EventAttributes.TABLE_NAME, req.identifier().name()) @@ -823,7 +823,7 @@ public Response sendNotification( new PolarisEvent( PolarisEventType.BEFORE_SEND_NOTIFICATION, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) @@ -835,7 +835,7 @@ public Response sendNotification( new PolarisEvent( PolarisEventType.AFTER_SEND_NOTIFICATION, eventMetadataFactory.create(), - new AttributeMap() + new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table))); diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java index 6a0846969c..3d60302ab3 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/MetricsReportingConfiguration.java @@ -26,4 +26,3 @@ public interface MetricsReportingConfiguration { @WithDefault("default") String type(); } - diff --git a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java index c3709ea77d..bf2b5e00fc 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/events/listeners/inmemory/InMemoryBufferEventListenerIntegrationTest.java @@ -49,14 +49,8 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SessionCatalog; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.metrics.ImmutableScanReport; -import org.apache.iceberg.metrics.ScanMetrics; -import org.apache.iceberg.metrics.ScanMetricsResult; -import org.apache.iceberg.metrics.ScanReport; import org.apache.iceberg.rest.RESTSessionCatalog; import org.apache.iceberg.rest.auth.OAuth2Properties; -import org.apache.iceberg.rest.requests.ReportMetricsRequest; import org.apache.iceberg.types.Types; import org.apache.polaris.core.admin.model.Catalog; import org.apache.polaris.core.admin.model.CatalogProperties; diff --git a/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java b/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java index 11d8a4dae8..e23a7a9264 100644 --- a/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java +++ b/runtime/service/src/testFixtures/java/org/apache/polaris/service/events/listeners/TestPolarisEventListener.java @@ -43,5 +43,4 @@ public PolarisEvent getLatest(PolarisEventType type) { } return latest; } - } From fd45762fa1e13217a42985aa61447d28512f4472 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 09:14:43 -0800 Subject: [PATCH 41/67] feat: Add PersistingMetricsReporter to connect REST API to MetricsPersistence SPI This new reporter implementation: - Persists Iceberg scan and commit metrics to the MetricsPersistence backend - Looks up catalog entity by name to obtain catalog ID - Uses MetricsRecordConverter to convert Iceberg reports to SPI records - Can be enabled by setting polaris.iceberg-metrics.reporting.type=persisting --- .../reporting/PersistingMetricsReporter.java | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java new file mode 100644 index 0000000000..f3703e3c8c --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import io.smallrye.common.annotation.Identifier; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import java.time.Instant; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.MetricsReport; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.polaris.core.context.CallContext; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.entity.PolarisEntitySubType; +import org.apache.polaris.core.entity.PolarisEntityType; +import org.apache.polaris.core.metrics.iceberg.MetricsRecordConverter; +import org.apache.polaris.core.persistence.MetaStoreManagerFactory; +import org.apache.polaris.core.persistence.PolarisMetaStoreManager; +import org.apache.polaris.core.persistence.dao.entity.EntityResult; +import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; +import org.apache.polaris.core.persistence.metrics.MetricsPersistence; +import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Implementation of {@link PolarisMetricsReporter} that persists metrics to the configured {@link + * MetricsPersistence} backend. + * + *

    This reporter is selected when {@code polaris.iceberg-metrics.reporting.type} is set to {@code + * "persisting"}. + * + *

    The reporter looks up the catalog entity by name to obtain the catalog ID, then uses {@link + * MetricsRecordConverter} to convert Iceberg metrics reports to SPI records before persisting them. + * + * @see PolarisMetricsReporter + * @see MetricsPersistence + * @see MetricsRecordConverter + */ +@RequestScoped +@Identifier("persisting") +public class PersistingMetricsReporter implements PolarisMetricsReporter { + private static final Logger LOGGER = LoggerFactory.getLogger(PersistingMetricsReporter.class); + + private final RealmContext realmContext; + private final CallContext callContext; + private final PolarisMetaStoreManager metaStoreManager; + private final MetaStoreManagerFactory metaStoreManagerFactory; + + @Inject + public PersistingMetricsReporter( + RealmContext realmContext, + CallContext callContext, + PolarisMetaStoreManager metaStoreManager, + MetaStoreManagerFactory metaStoreManagerFactory) { + this.realmContext = realmContext; + this.callContext = callContext; + this.metaStoreManager = metaStoreManager; + this.metaStoreManagerFactory = metaStoreManagerFactory; + } + + @Override + public void reportMetric( + String catalogName, + TableIdentifier table, + MetricsReport metricsReport, + Instant receivedTimestamp) { + + // Get the MetricsPersistence implementation for this realm + MetricsPersistence persistence = + metaStoreManagerFactory.getOrCreateMetricsPersistence(realmContext); + + // Look up the catalog entity to get the catalog ID + EntityResult catalogResult = + metaStoreManager.readEntityByName( + callContext.getPolarisCallContext(), + null, // catalogPath is null for top-level entities + PolarisEntityType.CATALOG, + PolarisEntitySubType.ANY_SUBTYPE, + catalogName); + + if (!catalogResult.isSuccess()) { + LOGGER.warn( + "Failed to find catalog '{}' for metrics persistence. Metrics will not be stored.", + catalogName); + return; + } + + long catalogId = catalogResult.getEntity().getId(); + + if (metricsReport instanceof ScanReport scanReport) { + ScanMetricsRecord record = + MetricsRecordConverter.forScanReport(scanReport) + .catalogId(catalogId) + .catalogName(catalogName) + .tableIdentifier(table) + .build(); + persistence.writeScanReport(record); + LOGGER.debug( + "Persisted scan metrics for {}.{} (reportId={})", catalogName, table, record.reportId()); + } else if (metricsReport instanceof CommitReport commitReport) { + CommitMetricsRecord record = + MetricsRecordConverter.forCommitReport(commitReport) + .catalogId(catalogId) + .catalogName(catalogName) + .tableIdentifier(table) + .build(); + persistence.writeCommitReport(record); + LOGGER.debug( + "Persisted commit metrics for {}.{} (reportId={})", + catalogName, + table, + record.reportId()); + } else { + LOGGER.warn( + "Unknown metrics report type: {}. Metrics will not be stored.", + metricsReport.getClass().getName()); + } + } +} From 3e4b3005fcdd55aa5c7492a2243d9ea4217fa014 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 09:25:09 -0800 Subject: [PATCH 42/67] test: Add unit tests for PersistingMetricsReporter --- .../PersistingMetricsReporterTest.java | 232 ++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java new file mode 100644 index 0000000000..c5c4a2049a --- /dev/null +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.reporting; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.time.Instant; +import java.util.Map; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.metrics.CommitMetrics; +import org.apache.iceberg.metrics.CommitMetricsResult; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.ImmutableCommitReport; +import org.apache.iceberg.metrics.ImmutableScanReport; +import org.apache.iceberg.metrics.MetricsReport; +import org.apache.iceberg.metrics.ScanMetrics; +import org.apache.iceberg.metrics.ScanMetricsResult; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.polaris.core.PolarisCallContext; +import org.apache.polaris.core.context.CallContext; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.entity.PolarisBaseEntity; +import org.apache.polaris.core.entity.PolarisEntitySubType; +import org.apache.polaris.core.entity.PolarisEntityType; +import org.apache.polaris.core.persistence.MetaStoreManagerFactory; +import org.apache.polaris.core.persistence.PolarisMetaStoreManager; +import org.apache.polaris.core.persistence.dao.entity.BaseResult; +import org.apache.polaris.core.persistence.dao.entity.EntityResult; +import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; +import org.apache.polaris.core.persistence.metrics.MetricsPersistence; +import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; + +public class PersistingMetricsReporterTest { + + private static final String CATALOG_NAME = "test-catalog"; + private static final long CATALOG_ID = 12345L; + private static final TableIdentifier TABLE_IDENTIFIER = + TableIdentifier.of(Namespace.of("db", "schema"), "test_table"); + + private RealmContext realmContext; + private CallContext callContext; + private PolarisCallContext polarisCallContext; + private PolarisMetaStoreManager metaStoreManager; + private MetaStoreManagerFactory metaStoreManagerFactory; + private MetricsPersistence metricsPersistence; + private PersistingMetricsReporter reporter; + + @BeforeEach + void setUp() { + realmContext = () -> "test-realm"; + polarisCallContext = mock(PolarisCallContext.class); + callContext = mock(CallContext.class); + when(callContext.getPolarisCallContext()).thenReturn(polarisCallContext); + + metaStoreManager = mock(PolarisMetaStoreManager.class); + metaStoreManagerFactory = mock(MetaStoreManagerFactory.class); + metricsPersistence = mock(MetricsPersistence.class); + + when(metaStoreManagerFactory.getOrCreateMetricsPersistence(realmContext)) + .thenReturn(metricsPersistence); + + reporter = + new PersistingMetricsReporter( + realmContext, callContext, metaStoreManager, metaStoreManagerFactory); + } + + @Test + void testReportScanMetrics() { + // Setup catalog lookup + PolarisBaseEntity catalogEntity = createCatalogEntity(CATALOG_ID, CATALOG_NAME); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + eq(null), + eq(PolarisEntityType.CATALOG), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq(CATALOG_NAME))) + .thenReturn(new EntityResult(catalogEntity)); + + // Create a scan report + ScanReport scanReport = createScanReport(); + + // Call the reporter + reporter.reportMetric(CATALOG_NAME, TABLE_IDENTIFIER, scanReport, Instant.now()); + + // Verify persistence was called with correct record + ArgumentCaptor captor = ArgumentCaptor.forClass(ScanMetricsRecord.class); + verify(metricsPersistence).writeScanReport(captor.capture()); + + ScanMetricsRecord record = captor.getValue(); + assertThat(record.catalogId()).isEqualTo(CATALOG_ID); + assertThat(record.catalogName()).isEqualTo(CATALOG_NAME); + assertThat(record.tableIdentifier()).isEqualTo(TABLE_IDENTIFIER); + assertThat(record.reportId()).isNotNull(); + } + + @Test + void testReportCommitMetrics() { + // Setup catalog lookup + PolarisBaseEntity catalogEntity = createCatalogEntity(CATALOG_ID, CATALOG_NAME); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + eq(null), + eq(PolarisEntityType.CATALOG), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq(CATALOG_NAME))) + .thenReturn(new EntityResult(catalogEntity)); + + // Create a commit report + CommitReport commitReport = createCommitReport(); + + // Call the reporter + reporter.reportMetric(CATALOG_NAME, TABLE_IDENTIFIER, commitReport, Instant.now()); + + // Verify persistence was called with correct record + ArgumentCaptor captor = ArgumentCaptor.forClass(CommitMetricsRecord.class); + verify(metricsPersistence).writeCommitReport(captor.capture()); + + CommitMetricsRecord record = captor.getValue(); + assertThat(record.catalogId()).isEqualTo(CATALOG_ID); + assertThat(record.catalogName()).isEqualTo(CATALOG_NAME); + assertThat(record.tableIdentifier()).isEqualTo(TABLE_IDENTIFIER); + assertThat(record.reportId()).isNotNull(); + } + + @Test + void testCatalogNotFound() { + // Setup catalog lookup to return entity not found + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + eq(null), + eq(PolarisEntityType.CATALOG), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq(CATALOG_NAME))) + .thenReturn( + new EntityResult(BaseResult.ReturnStatus.ENTITY_NOT_FOUND, "Catalog not found")); + + ScanReport scanReport = createScanReport(); + + // Call the reporter - should not throw + reporter.reportMetric(CATALOG_NAME, TABLE_IDENTIFIER, scanReport, Instant.now()); + + // Verify persistence was NOT called since catalog was not found + verify(metricsPersistence, never()).writeScanReport(any()); + verify(metricsPersistence, never()).writeCommitReport(any()); + } + + @Test + void testUnknownReportType() { + // Setup catalog lookup + PolarisBaseEntity catalogEntity = createCatalogEntity(CATALOG_ID, CATALOG_NAME); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + eq(null), + eq(PolarisEntityType.CATALOG), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq(CATALOG_NAME))) + .thenReturn(new EntityResult(catalogEntity)); + + // Create an unknown report type (using a mock) + MetricsReport unknownReport = mock(MetricsReport.class); + + // Call the reporter - should not throw + reporter.reportMetric(CATALOG_NAME, TABLE_IDENTIFIER, unknownReport, Instant.now()); + + // Verify persistence was NOT called since report type is unknown + verify(metricsPersistence, never()).writeScanReport(any()); + verify(metricsPersistence, never()).writeCommitReport(any()); + } + + private PolarisBaseEntity createCatalogEntity(long id, String name) { + return new PolarisBaseEntity.Builder() + .catalogId(0L) + .id(id) + .parentId(0L) + .typeCode(PolarisEntityType.CATALOG.getCode()) + .subTypeCode(PolarisEntitySubType.NULL_SUBTYPE.getCode()) + .name(name) + .entityVersion(1) + .build(); + } + + private ScanReport createScanReport() { + return ImmutableScanReport.builder() + .tableName("db.schema.test_table") + .snapshotId(123456789L) + .schemaId(1) + .filter(Expressions.alwaysTrue()) + .scanMetrics(ScanMetricsResult.fromScanMetrics(ScanMetrics.noop())) + .build(); + } + + private CommitReport createCommitReport() { + CommitMetrics commitMetrics = + CommitMetrics.of(new org.apache.iceberg.metrics.DefaultMetricsContext()); + CommitMetricsResult metricsResult = CommitMetricsResult.from(commitMetrics, Map.of()); + + return ImmutableCommitReport.builder() + .tableName("db.schema.test_table") + .snapshotId(987654321L) + .sequenceNumber(5L) + .operation("append") + .commitMetrics(metricsResult) + .build(); + } +} From a438cd709c098e758dc68f6c99bd5a412655f264 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 10:47:02 -0800 Subject: [PATCH 43/67] fix: Restore eventAttributeMap for proper TABLE_METADATA event handling Restored the request-scoped EventAttributeMap that was inadvertently removed. This fixes the CommitTransactionEventTest by ensuring TABLE_METADATAS populated by IcebergCatalogHandler is accessible to IcebergRestCatalogEventServiceDelegator. --- .../iceberg/IcebergCatalogAdapter.java | 7 ++++-- ...ebergRestCatalogEventServiceDelegator.java | 23 +++++++++++++++---- .../apache/polaris/service/TestServices.java | 8 +++++-- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java index a056e1361c..59629a8267 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalogAdapter.java @@ -108,6 +108,7 @@ public class IcebergCatalogAdapter private final StorageAccessConfigProvider storageAccessConfigProvider; private final PolarisMetricsReporter metricsReporter; private final Clock clock; + private final EventAttributeMap eventAttributeMap; @Inject public IcebergCatalogAdapter( @@ -126,7 +127,8 @@ public IcebergCatalogAdapter( @Any Instance externalCatalogFactories, StorageAccessConfigProvider storageAccessConfigProvider, PolarisMetricsReporter metricsReporter, - Clock clock) { + Clock clock, + EventAttributeMap eventAttributeMap) { this.diagnostics = diagnostics; this.realmContext = realmContext; this.callContext = callContext; @@ -144,6 +146,7 @@ public IcebergCatalogAdapter( this.storageAccessConfigProvider = storageAccessConfigProvider; this.metricsReporter = metricsReporter; this.clock = clock; + this.eventAttributeMap = eventAttributeMap; } /** @@ -193,7 +196,7 @@ IcebergCatalogHandler newHandlerWrapper(SecurityContext securityContext, String catalogHandlerUtils, externalCatalogFactories, storageAccessConfigProvider, - new EventAttributeMap()); + eventAttributeMap); } @Override diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java index 486fd3d27c..950e218be6 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java @@ -26,6 +26,8 @@ import jakarta.inject.Inject; import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.SecurityContext; +import java.util.List; +import org.apache.iceberg.TableMetadata; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.rest.requests.CommitTransactionRequest; import org.apache.iceberg.rest.requests.CreateNamespaceRequest; @@ -64,6 +66,7 @@ public class IcebergRestCatalogEventServiceDelegator @Inject PolarisEventListener polarisEventListener; @Inject PolarisEventMetadataFactory eventMetadataFactory; @Inject CatalogPrefixParser prefixParser; + @Inject EventAttributeMap eventAttributeMap; // Constructor for testing - allows manual dependency injection @VisibleForTesting @@ -71,11 +74,13 @@ public IcebergRestCatalogEventServiceDelegator( IcebergCatalogAdapter delegate, PolarisEventListener polarisEventListener, PolarisEventMetadataFactory eventMetadataFactory, - CatalogPrefixParser prefixParser) { + CatalogPrefixParser prefixParser, + EventAttributeMap eventAttributeMap) { this.delegate = delegate; this.polarisEventListener = polarisEventListener; this.eventMetadataFactory = eventMetadataFactory; this.prefixParser = prefixParser; + this.eventAttributeMap = eventAttributeMap; } // Default constructor for CDI @@ -507,7 +512,9 @@ public Response updateTable( .put(EventAttributes.NAMESPACE, namespaceObj) .put(EventAttributes.TABLE_NAME, table) .put(EventAttributes.UPDATE_TABLE_REQUEST, commitTableRequest) - .put(EventAttributes.LOAD_TABLE_RESPONSE, (LoadTableResponse) resp.getEntity()))); + .put( + EventAttributes.TABLE_METADATA, + ((LoadTableResponse) resp.getEntity()).tableMetadata()))); return resp; } @@ -783,7 +790,14 @@ public Response commitTransaction( new EventAttributeMap() .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.COMMIT_TRANSACTION_REQUEST, commitTransactionRequest))); - for (UpdateTableRequest req : commitTransactionRequest.tableChanges()) { + List tableMetadataList = + eventAttributeMap.getRequired(EventAttributes.TABLE_METADATAS); + for (int i = 0; i < commitTransactionRequest.tableChanges().size(); i++) { + UpdateTableRequest req = commitTransactionRequest.tableChanges().get(i); + TableMetadata tableMetadata = + tableMetadataList != null && i < tableMetadataList.size() + ? tableMetadataList.get(i) + : null; polarisEventListener.onEvent( new PolarisEvent( PolarisEventType.AFTER_UPDATE_TABLE, @@ -792,7 +806,8 @@ public Response commitTransaction( .put(EventAttributes.CATALOG_NAME, catalogName) .put(EventAttributes.NAMESPACE, req.identifier().namespace()) .put(EventAttributes.TABLE_NAME, req.identifier().name()) - .put(EventAttributes.UPDATE_TABLE_REQUEST, req))); + .put(EventAttributes.UPDATE_TABLE_REQUEST, req) + .put(EventAttributes.TABLE_METADATA, tableMetadata))); } return resp; } diff --git a/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java b/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java index 30303121e1..7b4e5c2dda 100644 --- a/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java +++ b/runtime/service/src/testFixtures/java/org/apache/polaris/service/TestServices.java @@ -83,6 +83,7 @@ import org.apache.polaris.service.context.catalog.PolarisCallContextCatalogFactory; import org.apache.polaris.service.credentials.DefaultPolarisCredentialManager; import org.apache.polaris.service.credentials.connection.SigV4ConnectionCredentialVendor; +import org.apache.polaris.service.events.EventAttributeMap; import org.apache.polaris.service.events.PolarisEventMetadata; import org.apache.polaris.service.events.PolarisEventMetadataFactory; import org.apache.polaris.service.events.listeners.PolarisEventListener; @@ -334,6 +335,7 @@ public String getAuthenticationScheme() { Mockito.when(externalCatalogFactory.select(any())).thenReturn(externalCatalogFactory); Mockito.when(externalCatalogFactory.isUnsatisfied()).thenReturn(true); + EventAttributeMap eventAttributeMap = new EventAttributeMap(); IcebergCatalogAdapter catalogService = new IcebergCatalogAdapter( diagnostics, @@ -351,7 +353,8 @@ public String getAuthenticationScheme() { externalCatalogFactory, storageAccessConfigProvider, new DefaultMetricsReporter(), - Clock.systemUTC()); + Clock.systemUTC(), + eventAttributeMap); // Optionally wrap with event delegator IcebergRestCatalogApiService finalRestCatalogService = catalogService; @@ -362,7 +365,8 @@ public String getAuthenticationScheme() { catalogService, polarisEventListener, eventMetadataFactory, - new DefaultCatalogPrefixParser()); + new DefaultCatalogPrefixParser(), + eventAttributeMap); finalRestConfigurationService = new IcebergRestConfigurationEventServiceDelegator( catalogService, polarisEventListener, eventMetadataFactory); From a2c9f5e7d11e795525a1f873f485c1cd73c1d49e Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 15:17:53 -0800 Subject: [PATCH 44/67] refactor: Update JDBC layer to match SPI changes - Change catalogId from String to long in all JDBC models - Remove catalogName from all JDBC models and converters - Update JdbcBasePersistenceImpl to query by catalogId (long) instead of catalogName - Update JdbcMetricsPersistence to use catalogId and List namespace - Update SpiModelConverter to handle namespace as List - Update MetricsReportConverter signatures to use long catalogId - Remove catalogName from PersistingMetricsReporter builder calls --- .../jdbc/JdbcBasePersistenceImpl.java | 20 ++++----- .../jdbc/JdbcMetricsPersistence.java | 18 ++++++-- .../relational/jdbc/SpiModelConverter.java | 42 +++++++++---------- .../jdbc/models/MetricsReportConverter.java | 10 +---- .../jdbc/models/ModelCommitMetricsReport.java | 13 ++---- .../ModelCommitMetricsReportConverter.java | 3 +- .../jdbc/models/ModelScanMetricsReport.java | 13 ++---- .../ModelScanMetricsReportConverter.java | 3 +- .../reporting/PersistingMetricsReporter.java | 2 - 9 files changed, 53 insertions(+), 71 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index b39c62c321..076b9f7f1c 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -537,8 +537,8 @@ private List loadCommitMetricsReportRoles( * *

    This method requires schema version 4 or higher. On older schemas, returns an empty list. * - * @param catalogName the catalog name - * @param namespace the namespace + * @param catalogId the catalog entity ID + * @param namespace the namespace (dot-separated) * @param tableName the table name * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound @@ -548,7 +548,7 @@ private List loadCommitMetricsReportRoles( */ @Nonnull public List queryScanMetricsReports( - @Nonnull String catalogName, + long catalogId, @Nonnull String namespace, @Nonnull String tableName, @Nullable Long startTimeMs, @@ -559,8 +559,8 @@ public List queryScanMetricsReports( } try { StringBuilder whereClause = new StringBuilder(); - whereClause.append("realm_id = ? AND catalog_name = ? AND namespace = ? AND table_name = ?"); - List values = new ArrayList<>(List.of(realmId, catalogName, namespace, tableName)); + whereClause.append("realm_id = ? AND catalog_id = ? AND namespace = ? AND table_name = ?"); + List values = new ArrayList<>(List.of(realmId, catalogId, namespace, tableName)); if (startTimeMs != null) { whereClause.append(" AND timestamp_ms >= ?"); @@ -597,8 +597,8 @@ public List queryScanMetricsReports( * *

    This method requires schema version 4 or higher. On older schemas, returns an empty list. * - * @param catalogName the catalog name - * @param namespace the namespace + * @param catalogId the catalog entity ID + * @param namespace the namespace (dot-separated) * @param tableName the table name * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound @@ -608,7 +608,7 @@ public List queryScanMetricsReports( */ @Nonnull public List queryCommitMetricsReports( - @Nonnull String catalogName, + long catalogId, @Nonnull String namespace, @Nonnull String tableName, @Nullable Long startTimeMs, @@ -618,10 +618,10 @@ public List queryCommitMetricsReports( return Collections.emptyList(); } try { - List values = new ArrayList<>(List.of(realmId, catalogName, namespace, tableName)); + List values = new ArrayList<>(List.of(realmId, catalogId, namespace, tableName)); StringBuilder whereClause = new StringBuilder(); - whereClause.append("realm_id = ? AND catalog_name = ? AND namespace = ? AND table_name = ?"); + whereClause.append("realm_id = ? AND catalog_id = ? AND namespace = ? AND table_name = ?"); if (startTimeMs != null) { whereClause.append(" AND timestamp_ms >= ?"); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java index 883c4b38b4..70daa82cf0 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java @@ -79,14 +79,19 @@ public Page queryScanReports( return Page.fromItems(List.of()); } + // catalogId is required for queries + if (criteria.catalogId().isEmpty()) { + return Page.fromItems(List.of()); + } + int limit = pageToken.pageSize().orElse(100); Long startTimeMs = criteria.startTime().map(t -> t.toEpochMilli()).orElse(null); Long endTimeMs = criteria.endTime().map(t -> t.toEpochMilli()).orElse(null); List models = jdbcPersistence.queryScanMetricsReports( - criteria.catalogName().orElse(""), - criteria.namespace().orElse(""), + criteria.catalogId().getAsLong(), + String.join(".", criteria.namespace()), criteria.tableName().orElse(""), startTimeMs, endTimeMs, @@ -106,14 +111,19 @@ public Page queryCommitReports( return Page.fromItems(List.of()); } + // catalogId is required for queries + if (criteria.catalogId().isEmpty()) { + return Page.fromItems(List.of()); + } + int limit = pageToken.pageSize().orElse(100); Long startTimeMs = criteria.startTime().map(t -> t.toEpochMilli()).orElse(null); Long endTimeMs = criteria.endTime().map(t -> t.toEpochMilli()).orElse(null); List models = jdbcPersistence.queryCommitMetricsReports( - criteria.catalogName().orElse(""), - criteria.namespace().orElse(""), + criteria.catalogId().getAsLong(), + String.join(".", criteria.namespace()), criteria.tableName().orElse(""), startTimeMs, endTimeMs, diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java index 2ac7e89fb8..44d9c585f0 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java @@ -22,13 +22,12 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import java.time.Instant; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelCommitMetricsReport; @@ -46,8 +45,8 @@ *

    Key conversions handled: * *

      - *
    • catalogId: long (SPI) ↔ String (Model) - *
    • tableIdentifier: TableIdentifier (SPI) ↔ separate namespace/tableName strings (Model) + *
    • catalogId: long (SPI) ↔ long (Model) + *
    • namespace: List<String> (SPI) ↔ dot-separated string (Model) *
    • timestamp: Instant (SPI) ↔ long milliseconds (Model) *
    • metadata: Map<String, String> (SPI) ↔ JSON string (Model) *
    • projectedFieldIds/Names: List (SPI) ↔ comma-separated string (Model) @@ -72,10 +71,9 @@ public static ModelScanMetricsReport toModelScanReport(ScanMetricsRecord record, return ImmutableModelScanMetricsReport.builder() .reportId(record.reportId()) .realmId(realmId) - .catalogId(String.valueOf(record.catalogId())) - .catalogName(record.catalogName()) - .namespace(record.tableIdentifier().namespace().toString()) - .tableName(record.tableIdentifier().name()) + .catalogId(record.catalogId()) + .namespace(String.join(".", record.namespace())) + .tableName(record.tableName()) .timestampMs(record.timestamp().toEpochMilli()) .snapshotId(record.snapshotId().orElse(null)) .schemaId(record.schemaId().orElse(null)) @@ -114,10 +112,9 @@ public static ModelCommitMetricsReport toModelCommitReport( return ImmutableModelCommitMetricsReport.builder() .reportId(record.reportId()) .realmId(realmId) - .catalogId(String.valueOf(record.catalogId())) - .catalogName(record.catalogName()) - .namespace(record.tableIdentifier().namespace().toString()) - .tableName(record.tableIdentifier().name()) + .catalogId(record.catalogId()) + .namespace(String.join(".", record.namespace())) + .tableName(record.tableName()) .timestampMs(record.timestamp().toEpochMilli()) .snapshotId(record.snapshotId()) .sequenceNumber(record.sequenceNumber().orElse(null)) @@ -153,9 +150,9 @@ public static ModelCommitMetricsReport toModelCommitReport( public static ScanMetricsRecord toScanMetricsRecord(ModelScanMetricsReport model) { return ScanMetricsRecord.builder() .reportId(model.getReportId()) - .catalogId(Long.parseLong(model.getCatalogId())) - .catalogName(model.getCatalogName()) - .tableIdentifier(parseTableIdentifier(model.getNamespace(), model.getTableName())) + .catalogId(model.getCatalogId()) + .namespace(parseNamespace(model.getNamespace())) + .tableName(model.getTableName()) .timestamp(Instant.ofEpochMilli(model.getTimestampMs())) .snapshotId(Optional.ofNullable(model.getSnapshotId())) .schemaId(Optional.ofNullable(model.getSchemaId())) @@ -191,9 +188,9 @@ public static ScanMetricsRecord toScanMetricsRecord(ModelScanMetricsReport model public static CommitMetricsRecord toCommitMetricsRecord(ModelCommitMetricsReport model) { return CommitMetricsRecord.builder() .reportId(model.getReportId()) - .catalogId(Long.parseLong(model.getCatalogId())) - .catalogName(model.getCatalogName()) - .tableIdentifier(parseTableIdentifier(model.getNamespace(), model.getTableName())) + .catalogId(model.getCatalogId()) + .namespace(parseNamespace(model.getNamespace())) + .tableName(model.getTableName()) .timestamp(Instant.ofEpochMilli(model.getTimestampMs())) .snapshotId(model.getSnapshotId()) .sequenceNumber(Optional.ofNullable(model.getSequenceNumber())) @@ -225,13 +222,12 @@ public static CommitMetricsRecord toCommitMetricsRecord(ModelCommitMetricsReport // === Helper Methods === - private static TableIdentifier parseTableIdentifier(String namespace, String tableName) { + private static List parseNamespace(String namespace) { if (namespace == null || namespace.isEmpty()) { - return TableIdentifier.of(Namespace.empty(), tableName); + return Collections.emptyList(); } - // Namespace.toString() uses "." as separator, so we split by "." - String[] levels = namespace.split("\\."); - return TableIdentifier.of(Namespace.of(levels), tableName); + // Namespace is stored as dot-separated string + return Arrays.asList(namespace.split("\\.")); } private static String toCommaSeparated(List list) { diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java index c9660c2e84..dcdf9b925a 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java @@ -49,7 +49,6 @@ private MetricsReportConverter() { * @param scanReport the Iceberg scan report * @param realmId the realm ID for multi-tenancy * @param catalogId the catalog ID - * @param catalogName the catalog name * @param namespace the namespace (dot-separated) * @param principalName the principal who initiated the scan (optional) * @param requestId the request ID (optional) @@ -60,8 +59,7 @@ private MetricsReportConverter() { public static ModelScanMetricsReport fromScanReport( ScanReport scanReport, String realmId, - String catalogId, - String catalogName, + long catalogId, String namespace, @Nullable String principalName, @Nullable String requestId, @@ -78,7 +76,6 @@ public static ModelScanMetricsReport fromScanReport( .reportId(reportId) .realmId(realmId) .catalogId(catalogId) - .catalogName(catalogName) .namespace(namespace) .tableName(scanReport.tableName()) .timestampMs(timestampMs) @@ -146,7 +143,6 @@ public static ModelScanMetricsReport fromScanReport( * @param commitReport the Iceberg commit report * @param realmId the realm ID for multi-tenancy * @param catalogId the catalog ID - * @param catalogName the catalog name * @param namespace the namespace (dot-separated) * @param principalName the principal who initiated the commit (optional) * @param requestId the request ID (optional) @@ -157,8 +153,7 @@ public static ModelScanMetricsReport fromScanReport( public static ModelCommitMetricsReport fromCommitReport( CommitReport commitReport, String realmId, - String catalogId, - String catalogName, + long catalogId, String namespace, @Nullable String principalName, @Nullable String requestId, @@ -175,7 +170,6 @@ public static ModelCommitMetricsReport fromCommitReport( .reportId(reportId) .realmId(realmId) .catalogId(catalogId) - .catalogName(catalogName) .namespace(namespace) .tableName(commitReport.tableName()) .timestampMs(timestampMs) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java index 57026cea0c..ca23333c1d 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java @@ -38,7 +38,6 @@ public interface ModelCommitMetricsReport extends Converter toMap(DatabaseType databaseType) { map.put(REPORT_ID, getReportId()); map.put(REALM_ID, getRealmId()); map.put(CATALOG_ID, getCatalogId()); - map.put(CATALOG_NAME, getCatalogName()); map.put(NAMESPACE, getNamespace()); map.put(TABLE_NAME_COL, getTableName()); map.put(TIMESTAMP_MS, getTimestampMs()); @@ -281,8 +275,7 @@ default Map toMap(DatabaseType databaseType) { ImmutableModelCommitMetricsReport.builder() .reportId("") .realmId("") - .catalogId("") - .catalogName("") + .catalogId(0L) .namespace("") .tableName("") .timestampMs(0L) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java index 16417e4afe..9691e2f43f 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java @@ -34,8 +34,7 @@ public ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException return ImmutableModelCommitMetricsReport.builder() .reportId(rs.getString(ModelCommitMetricsReport.REPORT_ID)) .realmId(rs.getString(ModelCommitMetricsReport.REALM_ID)) - .catalogId(rs.getString(ModelCommitMetricsReport.CATALOG_ID)) - .catalogName(rs.getString(ModelCommitMetricsReport.CATALOG_NAME)) + .catalogId(rs.getLong(ModelCommitMetricsReport.CATALOG_ID)) .namespace(rs.getString(ModelCommitMetricsReport.NAMESPACE)) .tableName(rs.getString(ModelCommitMetricsReport.TABLE_NAME_COL)) .timestampMs(rs.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java index 3d28905f14..90ad5841dd 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java @@ -38,7 +38,6 @@ public interface ModelScanMetricsReport extends Converter toMap(DatabaseType databaseType) { map.put(REPORT_ID, getReportId()); map.put(REALM_ID, getRealmId()); map.put(CATALOG_ID, getCatalogId()); - map.put(CATALOG_NAME, getCatalogName()); map.put(NAMESPACE, getNamespace()); map.put(TABLE_NAME_COL, getTableName()); map.put(TIMESTAMP_MS, getTimestampMs()); @@ -285,8 +279,7 @@ default Map toMap(DatabaseType databaseType) { ImmutableModelScanMetricsReport.builder() .reportId("") .realmId("") - .catalogId("") - .catalogName("") + .catalogId(0L) .namespace("") .tableName("") .timestampMs(0L) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java index e754b3c85d..5449ae4be0 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java @@ -34,8 +34,7 @@ public ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { return ImmutableModelScanMetricsReport.builder() .reportId(rs.getString(ModelScanMetricsReport.REPORT_ID)) .realmId(rs.getString(ModelScanMetricsReport.REALM_ID)) - .catalogId(rs.getString(ModelScanMetricsReport.CATALOG_ID)) - .catalogName(rs.getString(ModelScanMetricsReport.CATALOG_NAME)) + .catalogId(rs.getLong(ModelScanMetricsReport.CATALOG_ID)) .namespace(rs.getString(ModelScanMetricsReport.NAMESPACE)) .tableName(rs.getString(ModelScanMetricsReport.TABLE_NAME_COL)) .timestampMs(rs.getLong(ModelScanMetricsReport.TIMESTAMP_MS)) diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java index f3703e3c8c..5a4cec6de5 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java @@ -109,7 +109,6 @@ public void reportMetric( ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport) .catalogId(catalogId) - .catalogName(catalogName) .tableIdentifier(table) .build(); persistence.writeScanReport(record); @@ -119,7 +118,6 @@ public void reportMetric( CommitMetricsRecord record = MetricsRecordConverter.forCommitReport(commitReport) .catalogId(catalogId) - .catalogName(catalogName) .tableIdentifier(table) .build(); persistence.writeCommitReport(record); From c571d295607985e57df99aaefb2fb29b09418f6f Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 15:50:32 -0800 Subject: [PATCH 45/67] refactor: Update JDBC layer to use tableId instead of tableName Per SPI changes - table names can change over time, so we use stable table entity IDs instead. Also removed catalogName as the catalogId is sufficient for identification. --- .../jdbc/JdbcBasePersistenceImpl.java | 16 +-- .../jdbc/JdbcMetricsPersistence.java | 4 +- .../relational/jdbc/SpiModelConverter.java | 8 +- .../jdbc/models/MetricsReportConverter.java | 8 +- .../jdbc/models/ModelCommitMetricsReport.java | 12 +- .../ModelCommitMetricsReportConverter.java | 2 +- .../jdbc/models/ModelScanMetricsReport.java | 12 +- .../ModelScanMetricsReportConverter.java | 2 +- .../jdbc/MetricsReportPersistenceTest.java | 124 +++++++++--------- .../jdbc/SpiModelConverterTest.java | 76 +++++------ .../models/ModelCommitMetricsReportTest.java | 18 +-- .../models/ModelScanMetricsReportTest.java | 21 +-- 12 files changed, 142 insertions(+), 161 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index 076b9f7f1c..70a4978997 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -539,7 +539,7 @@ private List loadCommitMetricsReportRoles( * * @param catalogId the catalog entity ID * @param namespace the namespace (dot-separated) - * @param tableName the table name + * @param tableId the table entity ID * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound * @param limit maximum number of results to return @@ -550,7 +550,7 @@ private List loadCommitMetricsReportRoles( public List queryScanMetricsReports( long catalogId, @Nonnull String namespace, - @Nonnull String tableName, + long tableId, @Nullable Long startTimeMs, @Nullable Long endTimeMs, int limit) { @@ -559,8 +559,8 @@ public List queryScanMetricsReports( } try { StringBuilder whereClause = new StringBuilder(); - whereClause.append("realm_id = ? AND catalog_id = ? AND namespace = ? AND table_name = ?"); - List values = new ArrayList<>(List.of(realmId, catalogId, namespace, tableName)); + whereClause.append("realm_id = ? AND catalog_id = ? AND namespace = ? AND table_id = ?"); + List values = new ArrayList<>(List.of(realmId, catalogId, namespace, tableId)); if (startTimeMs != null) { whereClause.append(" AND timestamp_ms >= ?"); @@ -599,7 +599,7 @@ public List queryScanMetricsReports( * * @param catalogId the catalog entity ID * @param namespace the namespace (dot-separated) - * @param tableName the table name + * @param tableId the table entity ID * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound * @param limit maximum number of results to return @@ -610,7 +610,7 @@ public List queryScanMetricsReports( public List queryCommitMetricsReports( long catalogId, @Nonnull String namespace, - @Nonnull String tableName, + long tableId, @Nullable Long startTimeMs, @Nullable Long endTimeMs, int limit) { @@ -618,10 +618,10 @@ public List queryCommitMetricsReports( return Collections.emptyList(); } try { - List values = new ArrayList<>(List.of(realmId, catalogId, namespace, tableName)); + List values = new ArrayList<>(List.of(realmId, catalogId, namespace, tableId)); StringBuilder whereClause = new StringBuilder(); - whereClause.append("realm_id = ? AND catalog_id = ? AND namespace = ? AND table_name = ?"); + whereClause.append("realm_id = ? AND catalog_id = ? AND namespace = ? AND table_id = ?"); if (startTimeMs != null) { whereClause.append(" AND timestamp_ms >= ?"); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java index 70daa82cf0..8798b8dd52 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java @@ -92,7 +92,7 @@ public Page queryScanReports( jdbcPersistence.queryScanMetricsReports( criteria.catalogId().getAsLong(), String.join(".", criteria.namespace()), - criteria.tableName().orElse(""), + criteria.tableId().orElse(0L), startTimeMs, endTimeMs, limit); @@ -124,7 +124,7 @@ public Page queryCommitReports( jdbcPersistence.queryCommitMetricsReports( criteria.catalogId().getAsLong(), String.join(".", criteria.namespace()), - criteria.tableName().orElse(""), + criteria.tableId().orElse(0L), startTimeMs, endTimeMs, limit); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java index 44d9c585f0..7e0f0449bb 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java @@ -73,7 +73,7 @@ public static ModelScanMetricsReport toModelScanReport(ScanMetricsRecord record, .realmId(realmId) .catalogId(record.catalogId()) .namespace(String.join(".", record.namespace())) - .tableName(record.tableName()) + .tableId(record.tableId()) .timestampMs(record.timestamp().toEpochMilli()) .snapshotId(record.snapshotId().orElse(null)) .schemaId(record.schemaId().orElse(null)) @@ -114,7 +114,7 @@ public static ModelCommitMetricsReport toModelCommitReport( .realmId(realmId) .catalogId(record.catalogId()) .namespace(String.join(".", record.namespace())) - .tableName(record.tableName()) + .tableId(record.tableId()) .timestampMs(record.timestamp().toEpochMilli()) .snapshotId(record.snapshotId()) .sequenceNumber(record.sequenceNumber().orElse(null)) @@ -152,7 +152,7 @@ public static ScanMetricsRecord toScanMetricsRecord(ModelScanMetricsReport model .reportId(model.getReportId()) .catalogId(model.getCatalogId()) .namespace(parseNamespace(model.getNamespace())) - .tableName(model.getTableName()) + .tableId(model.getTableId()) .timestamp(Instant.ofEpochMilli(model.getTimestampMs())) .snapshotId(Optional.ofNullable(model.getSnapshotId())) .schemaId(Optional.ofNullable(model.getSchemaId())) @@ -190,7 +190,7 @@ public static CommitMetricsRecord toCommitMetricsRecord(ModelCommitMetricsReport .reportId(model.getReportId()) .catalogId(model.getCatalogId()) .namespace(parseNamespace(model.getNamespace())) - .tableName(model.getTableName()) + .tableId(model.getTableId()) .timestamp(Instant.ofEpochMilli(model.getTimestampMs())) .snapshotId(model.getSnapshotId()) .sequenceNumber(Optional.ofNullable(model.getSequenceNumber())) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java index dcdf9b925a..d3fe29e4ad 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java @@ -49,6 +49,7 @@ private MetricsReportConverter() { * @param scanReport the Iceberg scan report * @param realmId the realm ID for multi-tenancy * @param catalogId the catalog ID + * @param tableId the table entity ID * @param namespace the namespace (dot-separated) * @param principalName the principal who initiated the scan (optional) * @param requestId the request ID (optional) @@ -60,6 +61,7 @@ public static ModelScanMetricsReport fromScanReport( ScanReport scanReport, String realmId, long catalogId, + long tableId, String namespace, @Nullable String principalName, @Nullable String requestId, @@ -77,7 +79,7 @@ public static ModelScanMetricsReport fromScanReport( .realmId(realmId) .catalogId(catalogId) .namespace(namespace) - .tableName(scanReport.tableName()) + .tableId(tableId) .timestampMs(timestampMs) .principalName(principalName) .requestId(requestId) @@ -143,6 +145,7 @@ public static ModelScanMetricsReport fromScanReport( * @param commitReport the Iceberg commit report * @param realmId the realm ID for multi-tenancy * @param catalogId the catalog ID + * @param tableId the table entity ID * @param namespace the namespace (dot-separated) * @param principalName the principal who initiated the commit (optional) * @param requestId the request ID (optional) @@ -154,6 +157,7 @@ public static ModelCommitMetricsReport fromCommitReport( CommitReport commitReport, String realmId, long catalogId, + long tableId, String namespace, @Nullable String principalName, @Nullable String requestId, @@ -171,7 +175,7 @@ public static ModelCommitMetricsReport fromCommitReport( .realmId(realmId) .catalogId(catalogId) .namespace(namespace) - .tableName(commitReport.tableName()) + .tableId(tableId) .timestampMs(timestampMs) .principalName(principalName) .requestId(requestId) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java index ca23333c1d..ce17831c82 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java @@ -39,7 +39,7 @@ public interface ModelCommitMetricsReport extends Converter toMap(DatabaseType databaseType) { map.put(REALM_ID, getRealmId()); map.put(CATALOG_ID, getCatalogId()); map.put(NAMESPACE, getNamespace()); - map.put(TABLE_NAME_COL, getTableName()); + map.put(TABLE_ID_COL, getTableId()); map.put(TIMESTAMP_MS, getTimestampMs()); map.put(PRINCIPAL_NAME, getPrincipalName()); map.put(REQUEST_ID, getRequestId()); @@ -277,7 +277,7 @@ default Map toMap(DatabaseType databaseType) { .realmId("") .catalogId(0L) .namespace("") - .tableName("") + .tableId(0L) .timestampMs(0L) .snapshotId(0L) .operation("") diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java index 9691e2f43f..b683edd0e3 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java @@ -36,7 +36,7 @@ public ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException .realmId(rs.getString(ModelCommitMetricsReport.REALM_ID)) .catalogId(rs.getLong(ModelCommitMetricsReport.CATALOG_ID)) .namespace(rs.getString(ModelCommitMetricsReport.NAMESPACE)) - .tableName(rs.getString(ModelCommitMetricsReport.TABLE_NAME_COL)) + .tableId(rs.getLong(ModelCommitMetricsReport.TABLE_ID_COL)) .timestampMs(rs.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)) .principalName(rs.getString(ModelCommitMetricsReport.PRINCIPAL_NAME)) .requestId(rs.getString(ModelCommitMetricsReport.REQUEST_ID)) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java index 90ad5841dd..012407ca86 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java @@ -39,7 +39,7 @@ public interface ModelScanMetricsReport extends Converter toMap(DatabaseType databaseType) { map.put(REALM_ID, getRealmId()); map.put(CATALOG_ID, getCatalogId()); map.put(NAMESPACE, getNamespace()); - map.put(TABLE_NAME_COL, getTableName()); + map.put(TABLE_ID_COL, getTableId()); map.put(TIMESTAMP_MS, getTimestampMs()); map.put(PRINCIPAL_NAME, getPrincipalName()); map.put(REQUEST_ID, getRequestId()); @@ -281,7 +281,7 @@ default Map toMap(DatabaseType databaseType) { .realmId("") .catalogId(0L) .namespace("") - .tableName("") + .tableId(0L) .timestampMs(0L) .resultDataFiles(0L) .resultDeleteFiles(0L) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java index 5449ae4be0..1abbc0389c 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java @@ -36,7 +36,7 @@ public ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { .realmId(rs.getString(ModelScanMetricsReport.REALM_ID)) .catalogId(rs.getLong(ModelScanMetricsReport.CATALOG_ID)) .namespace(rs.getString(ModelScanMetricsReport.NAMESPACE)) - .tableName(rs.getString(ModelScanMetricsReport.TABLE_NAME_COL)) + .tableId(rs.getLong(ModelScanMetricsReport.TABLE_ID_COL)) .timestampMs(rs.getLong(ModelScanMetricsReport.TIMESTAMP_MS)) .principalName(rs.getString(ModelScanMetricsReport.PRINCIPAL_NAME)) .requestId(rs.getString(ModelScanMetricsReport.REQUEST_ID)) diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index 74faabe2f6..9fc4463c23 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -81,10 +81,9 @@ void testWriteScanMetricsReport() { ImmutableModelScanMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) .namespace("db.schema") - .tableName("test_table") + .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) .schemaId(1) @@ -122,10 +121,9 @@ void testWriteCommitMetricsReport() { ImmutableModelCommitMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) .namespace("db.schema") - .tableName("test_table") + .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) .sequenceNumber(1L) @@ -166,10 +164,10 @@ void testWriteMultipleScanReports() { ImmutableModelScanMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db.schema") - .tableName("table_" + i) + .tableId(100L + i) .timestampMs(System.currentTimeMillis()) .resultDataFiles((long) (i * 10)) .resultDeleteFiles(0L) @@ -199,10 +197,10 @@ void testWriteReportWithNullOptionalFields() { ImmutableModelScanMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db") - .tableName("minimal_table") + .tableId(99999L) .timestampMs(System.currentTimeMillis()) // All optional fields left as null .resultDataFiles(1L) @@ -237,10 +235,10 @@ void testQueryScanMetricsReportsByTable() { ImmutableModelScanMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db.schema") - .tableName("query_test_table") + .tableId(88888L) .timestampMs(baseTime + i * 1000) .resultDataFiles((long) i) .resultDeleteFiles(0L) @@ -265,19 +263,19 @@ void testQueryScanMetricsReportsByTable() { // Query all reports for the table var results = persistence.queryScanMetricsReports( - "test-catalog", "db.schema", "query_test_table", null, null, 10); + 12345L, "db.schema", 88888L, null, null, 10); assertThat(results).hasSize(5); // Query with time range var rangeResults = persistence.queryScanMetricsReports( - "test-catalog", "db.schema", "query_test_table", baseTime + 1000, baseTime + 4000, 10); + 12345L, "db.schema", 88888L, baseTime + 1000, baseTime + 4000, 10); assertThat(rangeResults).hasSize(3); // Query with limit var limitedResults = persistence.queryScanMetricsReports( - "test-catalog", "db.schema", "query_test_table", null, null, 2); + 12345L, "db.schema", 88888L, null, null, 2); assertThat(limitedResults).hasSize(2); } @@ -290,10 +288,10 @@ void testQueryScanMetricsReportsByTraceId() { ImmutableModelScanMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db") - .tableName("trace_test_table") + .tableId(77777L) .timestampMs(System.currentTimeMillis()) .otelTraceId(traceId) .resultDataFiles(1L) @@ -333,10 +331,10 @@ void testDeleteOldScanMetricsReports() { ImmutableModelScanMetricsReport.builder() .reportId("old-report-" + UUID.randomUUID()) .realmId("TEST_REALM") - .catalogId("catalog1") - .catalogName("test_catalog") + .catalogId(11111L) + .namespace("test_namespace") - .tableName("test_table") + .tableId(67890L) .timestampMs(twoDaysAgo) .resultDataFiles(10L) .resultDeleteFiles(0L) @@ -362,10 +360,10 @@ void testDeleteOldScanMetricsReports() { ImmutableModelScanMetricsReport.builder() .reportId("recent-report-" + UUID.randomUUID()) .realmId("TEST_REALM") - .catalogId("catalog1") - .catalogName("test_catalog") + .catalogId(11111L) + .namespace("test_namespace") - .tableName("test_table") + .tableId(67890L) .timestampMs(oneHourAgo) .resultDataFiles(10L) .resultDeleteFiles(0L) @@ -396,7 +394,7 @@ void testDeleteOldScanMetricsReports() { // Query to verify only recent report remains var results = persistence.queryScanMetricsReports( - "test_catalog", "test_namespace", "test_table", null, null, 10); + 11111L, "test_namespace", 67890L, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -413,10 +411,10 @@ void testDeleteOldCommitMetricsReports() { ImmutableModelCommitMetricsReport.builder() .reportId("old-commit-" + UUID.randomUUID()) .realmId("TEST_REALM") - .catalogId("catalog1") - .catalogName("test_catalog") + .catalogId(11111L) + .namespace("test_namespace") - .tableName("test_table") + .tableId(67890L) .timestampMs(twoDaysAgo) .snapshotId(100L) .sequenceNumber(1L) @@ -447,10 +445,10 @@ void testDeleteOldCommitMetricsReports() { ImmutableModelCommitMetricsReport.builder() .reportId("recent-commit-" + UUID.randomUUID()) .realmId("TEST_REALM") - .catalogId("catalog1") - .catalogName("test_catalog") + .catalogId(11111L) + .namespace("test_namespace") - .tableName("test_table") + .tableId(67890L) .timestampMs(oneHourAgo) .snapshotId(101L) .sequenceNumber(2L) @@ -486,7 +484,7 @@ void testDeleteOldCommitMetricsReports() { // Query to verify only recent report remains var results = persistence.queryCommitMetricsReports( - "test_catalog", "test_namespace", "test_table", null, null, 10); + 11111L, "test_namespace", 67890L, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -519,10 +517,10 @@ void testWriteScanMetricsReport_OlderSchema_IsNoOp() { ImmutableModelScanMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db") - .tableName("test_table") + .tableId(67890L) .timestampMs(System.currentTimeMillis()) .resultDataFiles(1L) .resultDeleteFiles(0L) @@ -554,10 +552,10 @@ void testWriteCommitMetricsReport_OlderSchema_IsNoOp() { ImmutableModelCommitMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db") - .tableName("test_table") + .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) .operation("append") @@ -590,7 +588,7 @@ void testQueryScanMetricsReports_OlderSchema_ReturnsEmptyList() { JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); var results = - v3Persistence.queryScanMetricsReports("catalog", "namespace", "table", null, null, 10); + v3Persistence.queryScanMetricsReports(12345L, "namespace", 67890L, null, null, 10); assertThat(results).isEmpty(); } @@ -600,7 +598,7 @@ void testQueryCommitMetricsReports_OlderSchema_ReturnsEmptyList() { JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); var results = - v3Persistence.queryCommitMetricsReports("catalog", "namespace", "table", null, null, 10); + v3Persistence.queryCommitMetricsReports(12345L, "namespace", 67890L, null, null, 10); assertThat(results).isEmpty(); } @@ -656,10 +654,10 @@ void testWriteScanMetricsReportWithRoles() { ImmutableModelScanMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db.schema") - .tableName("test_table") + .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) .schemaId(1) @@ -697,10 +695,10 @@ void testWriteCommitMetricsReportWithRoles() { ImmutableModelCommitMetricsReport.builder() .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db.schema") - .tableName("test_table") + .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) .sequenceNumber(1L) @@ -745,10 +743,10 @@ void testScanMetricsReportRolesAreReadBack() { ImmutableModelScanMetricsReport.builder() .reportId(reportId) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db.schema") - .tableName("test_table") + .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) .schemaId(1) @@ -797,10 +795,10 @@ void testCommitMetricsReportRolesAreReadBack() { ImmutableModelCommitMetricsReport.builder() .reportId(reportId) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db.schema") - .tableName("test_table") + .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) .sequenceNumber(1L) @@ -851,10 +849,10 @@ void testScanMetricsReportWithEmptyRoles() { ImmutableModelScanMetricsReport.builder() .reportId(reportId) .realmId("TEST_REALM") - .catalogId("test-catalog") - .catalogName("test-catalog") + .catalogId(12345L) + .namespace("db.schema") - .tableName("test_table") + .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) .schemaId(1) @@ -902,10 +900,10 @@ void testScanMetricsReportRolesViaTimeRangeQuery() { ImmutableModelScanMetricsReport.builder() .reportId(reportId) .realmId("TEST_REALM") - .catalogId("test-catalog-roles-query") - .catalogName("test-catalog-roles-query") + .catalogId(22222L) + .namespace("db.schema") - .tableName("test_table_roles") + .tableId(66666L) .timestampMs(timestamp) .snapshotId(12345L) .schemaId(1) @@ -938,9 +936,9 @@ void testScanMetricsReportRolesViaTimeRangeQuery() { // Query by time range and verify roles are returned List results = persistence.queryScanMetricsReports( - "test-catalog-roles-query", + 22222L, "db.schema", - "test_table_roles", + 66666L, timestamp - 1000, timestamp + 1000, 100); diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java index f89cdf4df3..f6e611ea01 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java @@ -24,8 +24,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelCommitMetricsReport; @@ -40,9 +38,9 @@ public class SpiModelConverterTest { private static final String TEST_REPORT_ID = "report-123"; private static final String TEST_REALM_ID = "realm-1"; private static final long TEST_CATALOG_ID = 12345L; - private static final String TEST_CATALOG_NAME = "my_catalog"; - private static final String TEST_NAMESPACE = "db.schema"; - private static final String TEST_TABLE_NAME = "my_table"; + private static final List TEST_NAMESPACE = List.of("db", "schema"); + private static final String TEST_NAMESPACE_STR = "db.schema"; + private static final long TEST_TABLE_ID = 67890L; private static final Instant TEST_TIMESTAMP = Instant.ofEpochMilli(1704067200000L); private static final long TEST_TIMESTAMP_MS = 1704067200000L; @@ -56,10 +54,9 @@ void testToModelScanReport() { assertThat(model.getReportId()).isEqualTo(TEST_REPORT_ID); assertThat(model.getRealmId()).isEqualTo(TEST_REALM_ID); - assertThat(model.getCatalogId()).isEqualTo(String.valueOf(TEST_CATALOG_ID)); - assertThat(model.getCatalogName()).isEqualTo(TEST_CATALOG_NAME); - assertThat(model.getNamespace()).isEqualTo(TEST_NAMESPACE); - assertThat(model.getTableName()).isEqualTo(TEST_TABLE_NAME); + assertThat(model.getCatalogId()).isEqualTo(TEST_CATALOG_ID); + assertThat(model.getNamespace()).isEqualTo(TEST_NAMESPACE_STR); + assertThat(model.getTableId()).isEqualTo(TEST_TABLE_ID); assertThat(model.getTimestampMs()).isEqualTo(TEST_TIMESTAMP_MS); assertThat(model.getSnapshotId()).isEqualTo(123456789L); assertThat(model.getSchemaId()).isEqualTo(1); @@ -80,9 +77,8 @@ void testToScanMetricsRecord() { assertThat(record.reportId()).isEqualTo(TEST_REPORT_ID); assertThat(record.catalogId()).isEqualTo(TEST_CATALOG_ID); - assertThat(record.catalogName()).isEqualTo(TEST_CATALOG_NAME); - assertThat(record.tableIdentifier().namespace().toString()).isEqualTo(TEST_NAMESPACE); - assertThat(record.tableIdentifier().name()).isEqualTo(TEST_TABLE_NAME); + assertThat(record.namespace()).isEqualTo(TEST_NAMESPACE); + assertThat(record.tableId()).isEqualTo(TEST_TABLE_ID); assertThat(record.timestamp()).isEqualTo(TEST_TIMESTAMP); assertThat(record.snapshotId()).isEqualTo(Optional.of(123456789L)); assertThat(record.schemaId()).isEqualTo(Optional.of(1)); @@ -102,8 +98,8 @@ void testScanRecordRoundTrip() { assertThat(roundTripped.reportId()).isEqualTo(original.reportId()); assertThat(roundTripped.catalogId()).isEqualTo(original.catalogId()); - assertThat(roundTripped.catalogName()).isEqualTo(original.catalogName()); - assertThat(roundTripped.tableIdentifier()).isEqualTo(original.tableIdentifier()); + assertThat(roundTripped.namespace()).isEqualTo(original.namespace()); + assertThat(roundTripped.tableId()).isEqualTo(original.tableId()); assertThat(roundTripped.timestamp()).isEqualTo(original.timestamp()); assertThat(roundTripped.resultDataFiles()).isEqualTo(original.resultDataFiles()); } @@ -118,10 +114,9 @@ void testToModelCommitReport() { assertThat(model.getReportId()).isEqualTo(TEST_REPORT_ID); assertThat(model.getRealmId()).isEqualTo(TEST_REALM_ID); - assertThat(model.getCatalogId()).isEqualTo(String.valueOf(TEST_CATALOG_ID)); - assertThat(model.getCatalogName()).isEqualTo(TEST_CATALOG_NAME); - assertThat(model.getNamespace()).isEqualTo(TEST_NAMESPACE); - assertThat(model.getTableName()).isEqualTo(TEST_TABLE_NAME); + assertThat(model.getCatalogId()).isEqualTo(TEST_CATALOG_ID); + assertThat(model.getNamespace()).isEqualTo(TEST_NAMESPACE_STR); + assertThat(model.getTableId()).isEqualTo(TEST_TABLE_ID); assertThat(model.getTimestampMs()).isEqualTo(TEST_TIMESTAMP_MS); assertThat(model.getSnapshotId()).isEqualTo(987654321L); assertThat(model.getSequenceNumber()).isEqualTo(5L); @@ -140,9 +135,8 @@ void testToCommitMetricsRecord() { assertThat(record.reportId()).isEqualTo(TEST_REPORT_ID); assertThat(record.catalogId()).isEqualTo(TEST_CATALOG_ID); - assertThat(record.catalogName()).isEqualTo(TEST_CATALOG_NAME); - assertThat(record.tableIdentifier().namespace().toString()).isEqualTo(TEST_NAMESPACE); - assertThat(record.tableIdentifier().name()).isEqualTo(TEST_TABLE_NAME); + assertThat(record.namespace()).isEqualTo(TEST_NAMESPACE); + assertThat(record.tableId()).isEqualTo(TEST_TABLE_ID); assertThat(record.timestamp()).isEqualTo(TEST_TIMESTAMP); assertThat(record.snapshotId()).isEqualTo(987654321L); assertThat(record.sequenceNumber()).isEqualTo(Optional.of(5L)); @@ -160,8 +154,8 @@ void testCommitRecordRoundTrip() { assertThat(roundTripped.reportId()).isEqualTo(original.reportId()); assertThat(roundTripped.catalogId()).isEqualTo(original.catalogId()); - assertThat(roundTripped.catalogName()).isEqualTo(original.catalogName()); - assertThat(roundTripped.tableIdentifier()).isEqualTo(original.tableIdentifier()); + assertThat(roundTripped.namespace()).isEqualTo(original.namespace()); + assertThat(roundTripped.tableId()).isEqualTo(original.tableId()); assertThat(roundTripped.timestamp()).isEqualTo(original.timestamp()); assertThat(roundTripped.snapshotId()).isEqualTo(original.snapshotId()); assertThat(roundTripped.operation()).isEqualTo(original.operation()); @@ -175,8 +169,8 @@ void testEmptyNamespace() { ScanMetricsRecord.builder() .reportId(TEST_REPORT_ID) .catalogId(TEST_CATALOG_ID) - .catalogName(TEST_CATALOG_NAME) - .tableIdentifier(TableIdentifier.of(Namespace.empty(), TEST_TABLE_NAME)) + .namespace(List.of()) + .tableId(TEST_TABLE_ID) .timestamp(TEST_TIMESTAMP) .resultDataFiles(0L) .resultDeleteFiles(0L) @@ -200,7 +194,7 @@ void testEmptyNamespace() { assertThat(model.getNamespace()).isEmpty(); ScanMetricsRecord roundTripped = SpiModelConverter.toScanMetricsRecord(model); - assertThat(roundTripped.tableIdentifier().namespace()).isEqualTo(Namespace.empty()); + assertThat(roundTripped.namespace()).isEmpty(); } @Test @@ -209,8 +203,8 @@ void testNullOptionalFields() { ScanMetricsRecord.builder() .reportId(TEST_REPORT_ID) .catalogId(TEST_CATALOG_ID) - .catalogName(TEST_CATALOG_NAME) - .tableIdentifier(TableIdentifier.of(Namespace.of("db"), TEST_TABLE_NAME)) + .namespace(List.of("db")) + .tableId(TEST_TABLE_ID) .timestamp(TEST_TIMESTAMP) .resultDataFiles(0L) .resultDeleteFiles(0L) @@ -244,8 +238,8 @@ void testEmptyMetadata() { ScanMetricsRecord.builder() .reportId(TEST_REPORT_ID) .catalogId(TEST_CATALOG_ID) - .catalogName(TEST_CATALOG_NAME) - .tableIdentifier(TableIdentifier.of(Namespace.of("db"), TEST_TABLE_NAME)) + .namespace(List.of("db")) + .tableId(TEST_TABLE_ID) .timestamp(TEST_TIMESTAMP) .resultDataFiles(0L) .resultDeleteFiles(0L) @@ -275,8 +269,8 @@ private ScanMetricsRecord createTestScanRecord() { return ScanMetricsRecord.builder() .reportId(TEST_REPORT_ID) .catalogId(TEST_CATALOG_ID) - .catalogName(TEST_CATALOG_NAME) - .tableIdentifier(TableIdentifier.of(Namespace.of("db", "schema"), TEST_TABLE_NAME)) + .namespace(TEST_NAMESPACE) + .tableId(TEST_TABLE_ID) .timestamp(TEST_TIMESTAMP) .snapshotId(123456789L) .schemaId(1) @@ -307,10 +301,9 @@ private ModelScanMetricsReport createTestModelScanReport() { return ImmutableModelScanMetricsReport.builder() .reportId(TEST_REPORT_ID) .realmId(TEST_REALM_ID) - .catalogId(String.valueOf(TEST_CATALOG_ID)) - .catalogName(TEST_CATALOG_NAME) - .namespace(TEST_NAMESPACE) - .tableName(TEST_TABLE_NAME) + .catalogId(TEST_CATALOG_ID) + .namespace(TEST_NAMESPACE_STR) + .tableId(TEST_TABLE_ID) .timestampMs(TEST_TIMESTAMP_MS) .snapshotId(123456789L) .schemaId(1) @@ -341,8 +334,8 @@ private CommitMetricsRecord createTestCommitRecord() { return CommitMetricsRecord.builder() .reportId(TEST_REPORT_ID) .catalogId(TEST_CATALOG_ID) - .catalogName(TEST_CATALOG_NAME) - .tableIdentifier(TableIdentifier.of(Namespace.of("db", "schema"), TEST_TABLE_NAME)) + .namespace(TEST_NAMESPACE) + .tableId(TEST_TABLE_ID) .timestamp(TEST_TIMESTAMP) .snapshotId(987654321L) .sequenceNumber(5L) @@ -373,10 +366,9 @@ private ModelCommitMetricsReport createTestModelCommitReport() { return ImmutableModelCommitMetricsReport.builder() .reportId(TEST_REPORT_ID) .realmId(TEST_REALM_ID) - .catalogId(String.valueOf(TEST_CATALOG_ID)) - .catalogName(TEST_CATALOG_NAME) - .namespace(TEST_NAMESPACE) - .tableName(TEST_TABLE_NAME) + .catalogId(TEST_CATALOG_ID) + .namespace(TEST_NAMESPACE_STR) + .tableId(TEST_TABLE_ID) .timestampMs(TEST_TIMESTAMP_MS) .snapshotId(987654321L) .sequenceNumber(5L) diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java index 8bb295c4d1..e27bce6660 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java @@ -33,10 +33,9 @@ public class ModelCommitMetricsReportTest { private static final String TEST_REPORT_ID = "commit-report-123"; private static final String TEST_REALM_ID = "realm-1"; - private static final String TEST_CATALOG_ID = "catalog-1"; - private static final String TEST_CATALOG_NAME = "my_catalog"; + private static final long TEST_CATALOG_ID = 12345L; private static final String TEST_NAMESPACE = "db.schema"; - private static final String TEST_TABLE_NAME = "my_table"; + private static final long TEST_TABLE_ID = 67890L; private static final long TEST_TIMESTAMP_MS = 1704067200000L; private static final String TEST_PRINCIPAL = "user@example.com"; private static final String TEST_REQUEST_ID = "req-456"; @@ -71,12 +70,9 @@ public void testFromResultSet() throws SQLException { ResultSet mockResultSet = mock(ResultSet.class); when(mockResultSet.getString(ModelCommitMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); when(mockResultSet.getString(ModelCommitMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); - when(mockResultSet.getString(ModelCommitMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); - when(mockResultSet.getString(ModelCommitMetricsReport.CATALOG_NAME)) - .thenReturn(TEST_CATALOG_NAME); + when(mockResultSet.getLong(ModelCommitMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); when(mockResultSet.getString(ModelCommitMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); - when(mockResultSet.getString(ModelCommitMetricsReport.TABLE_NAME_COL)) - .thenReturn(TEST_TABLE_NAME); + when(mockResultSet.getLong(ModelCommitMetricsReport.TABLE_ID_COL)).thenReturn(TEST_TABLE_ID); when(mockResultSet.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)) .thenReturn(TEST_TIMESTAMP_MS); when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_NAME)) @@ -135,9 +131,8 @@ public void testFromResultSet() throws SQLException { assertEquals(TEST_REPORT_ID, result.getReportId()); assertEquals(TEST_REALM_ID, result.getRealmId()); assertEquals(TEST_CATALOG_ID, result.getCatalogId()); - assertEquals(TEST_CATALOG_NAME, result.getCatalogName()); assertEquals(TEST_NAMESPACE, result.getNamespace()); - assertEquals(TEST_TABLE_NAME, result.getTableName()); + assertEquals(TEST_TABLE_ID, result.getTableId()); assertEquals(TEST_TIMESTAMP_MS, result.getTimestampMs()); assertEquals(TEST_SNAPSHOT_ID, result.getSnapshotId()); assertEquals(TEST_OPERATION, result.getOperation()); @@ -179,9 +174,8 @@ private ModelCommitMetricsReport createTestReport() { .reportId(TEST_REPORT_ID) .realmId(TEST_REALM_ID) .catalogId(TEST_CATALOG_ID) - .catalogName(TEST_CATALOG_NAME) .namespace(TEST_NAMESPACE) - .tableName(TEST_TABLE_NAME) + .tableId(TEST_TABLE_ID) .timestampMs(TEST_TIMESTAMP_MS) .principalName(TEST_PRINCIPAL) .requestId(TEST_REQUEST_ID) diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java index 0c8f26a2ed..bc0200886c 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java @@ -33,10 +33,9 @@ public class ModelScanMetricsReportTest { private static final String TEST_REPORT_ID = "report-123"; private static final String TEST_REALM_ID = "realm-1"; - private static final String TEST_CATALOG_ID = "catalog-1"; - private static final String TEST_CATALOG_NAME = "my_catalog"; + private static final long TEST_CATALOG_ID = 12345L; private static final String TEST_NAMESPACE = "db.schema"; - private static final String TEST_TABLE_NAME = "my_table"; + private static final long TEST_TABLE_ID = 67890L; private static final long TEST_TIMESTAMP_MS = 1704067200000L; private static final String TEST_PRINCIPAL = "user@example.com"; private static final String TEST_REQUEST_ID = "req-456"; @@ -71,12 +70,9 @@ public void testFromResultSet() throws SQLException { ResultSet mockResultSet = mock(ResultSet.class); when(mockResultSet.getString(ModelScanMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); when(mockResultSet.getString(ModelScanMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); - when(mockResultSet.getString(ModelScanMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); - when(mockResultSet.getString(ModelScanMetricsReport.CATALOG_NAME)) - .thenReturn(TEST_CATALOG_NAME); + when(mockResultSet.getLong(ModelScanMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); when(mockResultSet.getString(ModelScanMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); - when(mockResultSet.getString(ModelScanMetricsReport.TABLE_NAME_COL)) - .thenReturn(TEST_TABLE_NAME); + when(mockResultSet.getLong(ModelScanMetricsReport.TABLE_ID_COL)).thenReturn(TEST_TABLE_ID); when(mockResultSet.getLong(ModelScanMetricsReport.TIMESTAMP_MS)).thenReturn(TEST_TIMESTAMP_MS); when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_NAME)).thenReturn(TEST_PRINCIPAL); when(mockResultSet.getString(ModelScanMetricsReport.REQUEST_ID)).thenReturn(TEST_REQUEST_ID); @@ -134,9 +130,8 @@ public void testFromResultSet() throws SQLException { assertEquals(TEST_REPORT_ID, result.getReportId()); assertEquals(TEST_REALM_ID, result.getRealmId()); assertEquals(TEST_CATALOG_ID, result.getCatalogId()); - assertEquals(TEST_CATALOG_NAME, result.getCatalogName()); assertEquals(TEST_NAMESPACE, result.getNamespace()); - assertEquals(TEST_TABLE_NAME, result.getTableName()); + assertEquals(TEST_TABLE_ID, result.getTableId()); assertEquals(TEST_TIMESTAMP_MS, result.getTimestampMs()); assertEquals(TEST_PRINCIPAL, result.getPrincipalName()); assertEquals(TEST_REQUEST_ID, result.getRequestId()); @@ -157,9 +152,8 @@ public void testToMapWithH2DatabaseType() { assertEquals(TEST_REPORT_ID, resultMap.get(ModelScanMetricsReport.REPORT_ID)); assertEquals(TEST_REALM_ID, resultMap.get(ModelScanMetricsReport.REALM_ID)); assertEquals(TEST_CATALOG_ID, resultMap.get(ModelScanMetricsReport.CATALOG_ID)); - assertEquals(TEST_CATALOG_NAME, resultMap.get(ModelScanMetricsReport.CATALOG_NAME)); assertEquals(TEST_NAMESPACE, resultMap.get(ModelScanMetricsReport.NAMESPACE)); - assertEquals(TEST_TABLE_NAME, resultMap.get(ModelScanMetricsReport.TABLE_NAME_COL)); + assertEquals(TEST_TABLE_ID, resultMap.get(ModelScanMetricsReport.TABLE_ID_COL)); assertEquals(TEST_TIMESTAMP_MS, resultMap.get(ModelScanMetricsReport.TIMESTAMP_MS)); assertEquals(TEST_RESULT_DATA_FILES, resultMap.get(ModelScanMetricsReport.RESULT_DATA_FILES)); assertEquals(TEST_METADATA, resultMap.get(ModelScanMetricsReport.METADATA)); @@ -182,9 +176,8 @@ private ModelScanMetricsReport createTestReport() { .reportId(TEST_REPORT_ID) .realmId(TEST_REALM_ID) .catalogId(TEST_CATALOG_ID) - .catalogName(TEST_CATALOG_NAME) .namespace(TEST_NAMESPACE) - .tableName(TEST_TABLE_NAME) + .tableId(TEST_TABLE_ID) .timestampMs(TEST_TIMESTAMP_MS) .principalName(TEST_PRINCIPAL) .requestId(TEST_REQUEST_ID) From 113aae220c6fd6eb546143d88ea8486211db8b42 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 15:57:19 -0800 Subject: [PATCH 46/67] fix: Apply tableId schema changes and update tests - Use table_id (BIGINT) instead of table_name (TEXT) in metrics tables - Use catalog_id (BIGINT) instead of catalog_name (TEXT) - Updated tests to use long IDs for catalogId and tableId --- .../src/main/resources/h2/schema-v4.sql | 58 ++++++++++++++----- .../src/main/resources/postgres/schema-v4.sql | 39 ++++++++----- .../jdbc/MetricsReportPersistenceTest.java | 36 ++---------- 3 files changed, 75 insertions(+), 58 deletions(-) diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql index 4fab1c5072..fd2bc29b50 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql @@ -17,14 +17,14 @@ -- under the License. -- --- Changes from v2: --- * Added `events` table --- * Added `idempotency_records` table for REST idempotency -- Changes from v3: --- * Added `scan_metrics_report` table for scan metrics as first-class entities --- * Added `scan_metrics_report_roles` junction table for principal roles --- * Added `commit_metrics_report` table for commit metrics as first-class entities --- * Added `commit_metrics_report_roles` junction table for principal roles +-- * Added `events` table +-- * Added `idempotency_records` table for REST idempotency +-- * Added `scan_metrics_report` table for scan metrics as first-class entities +-- * Added `scan_metrics_report_roles` junction table for principal roles +-- * Added `commit_metrics_report` table for commit metrics as first-class entities +-- * Added `commit_metrics_report_roles` junction table for principal roles +-- ============================================================================ CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET SCHEMA POLARIS_SCHEMA; @@ -127,6 +127,10 @@ CREATE TABLE IF NOT EXISTS policy_mapping_record ( CREATE INDEX IF NOT EXISTS idx_policy_mapping_record ON policy_mapping_record (realm_id, policy_type_code, policy_catalog_id, policy_id, target_catalog_id, target_id); +-- ============================================================================ +-- EVENTS TABLE (NEW in v4) +-- ============================================================================ + CREATE TABLE IF NOT EXISTS events ( realm_id TEXT NOT NULL, catalog_id TEXT NOT NULL, @@ -164,6 +168,36 @@ CREATE TABLE IF NOT EXISTS idempotency_records ( PRIMARY KEY (realm_id, idempotency_key) ); +CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires + ON idempotency_records (realm_id, expires_at); + +-- ============================================================================ +-- IDEMPOTENCY RECORDS TABLE (NEW in v4) +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS idempotency_records ( + realm_id TEXT NOT NULL, + idempotency_key TEXT NOT NULL, + operation_type TEXT NOT NULL, + resource_id TEXT NOT NULL, -- normalized request-derived resource identifier (not a generated entity id) + + -- Finalization/replay + http_status INTEGER, -- NULL while IN_PROGRESS; set only on finalized 2xx/terminal 4xx + error_subtype TEXT, -- optional: e.g., already_exists, namespace_not_empty, idempotency_replay_failed + response_summary TEXT, -- minimal body to reproduce equivalent response (JSON string) + response_headers TEXT, -- small whitelisted headers to replay (JSON string) + finalized_at TIMESTAMP, -- when http_status was written + + -- Liveness/ops + created_at TIMESTAMP NOT NULL, + updated_at TIMESTAMP NOT NULL, + heartbeat_at TIMESTAMP, -- updated by owner while IN_PROGRESS + executor_id TEXT, -- owner pod/worker id + expires_at TIMESTAMP, + + PRIMARY KEY (realm_id, idempotency_key) +); + CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires ON idempotency_records (realm_id, expires_at); @@ -175,10 +209,9 @@ CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires CREATE TABLE IF NOT EXISTS scan_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id TEXT NOT NULL, - catalog_name TEXT NOT NULL, + catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, - table_name TEXT NOT NULL, + table_id BIGINT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, @@ -246,10 +279,9 @@ COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for sca CREATE TABLE IF NOT EXISTS commit_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id TEXT NOT NULL, - catalog_name TEXT NOT NULL, + catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, - table_name TEXT NOT NULL, + table_id BIGINT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql index b85496b5fc..530fc969d9 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql @@ -16,14 +16,20 @@ -- specific language governing permissions and limitations -- under the License. --- Changes from v2: --- * Added `events` table --- * Added `idempotency_records` table for REST idempotency +-- ============================================================================ +-- POLARIS JDBC SCHEMA VERSION 4 (PostgreSQL) +-- ============================================================================ +-- This schema is SELF-CONTAINED and can be used for fresh installs. +-- Each schema version includes ALL tables, not just incremental changes. +-- -- Changes from v3: --- * Added `scan_metrics_report` table for scan metrics as first-class entities --- * Added `scan_metrics_report_roles` junction table for principal roles --- * Added `commit_metrics_report` table for commit metrics as first-class entities --- * Added `commit_metrics_report_roles` junction table for principal roles +-- * Added `events` table +-- * Added `idempotency_records` table for REST idempotency +-- * Added `scan_metrics_report` table for scan metrics as first-class entities +-- * Added `scan_metrics_report_roles` junction table for principal roles +-- * Added `commit_metrics_report` table for commit metrics as first-class entities +-- * Added `commit_metrics_report_roles` junction table for principal roles +-- ============================================================================ CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET search_path TO POLARIS_SCHEMA; @@ -127,6 +133,10 @@ CREATE TABLE IF NOT EXISTS policy_mapping_record ( CREATE INDEX IF NOT EXISTS idx_policy_mapping_record ON policy_mapping_record (realm_id, policy_type_code, policy_catalog_id, policy_id, target_catalog_id, target_id); +-- ============================================================================ +-- EVENTS TABLE (NEW in v4) +-- ============================================================================ + CREATE TABLE IF NOT EXISTS events ( realm_id TEXT NOT NULL, catalog_id TEXT NOT NULL, @@ -141,7 +151,10 @@ CREATE TABLE IF NOT EXISTS events ( PRIMARY KEY (event_id) ); --- Idempotency records (key-only idempotency; durable replay) +-- ============================================================================ +-- IDEMPOTENCY RECORDS TABLE (NEW in v4) +-- ============================================================================ + CREATE TABLE IF NOT EXISTS idempotency_records ( realm_id TEXT NOT NULL, idempotency_key TEXT NOT NULL, @@ -177,10 +190,9 @@ CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires CREATE TABLE IF NOT EXISTS scan_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id TEXT NOT NULL, - catalog_name TEXT NOT NULL, + catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, - table_name TEXT NOT NULL, + table_id BIGINT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, @@ -254,10 +266,9 @@ COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for sca CREATE TABLE IF NOT EXISTS commit_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, - catalog_id TEXT NOT NULL, - catalog_name TEXT NOT NULL, + catalog_id BIGINT NOT NULL, namespace TEXT NOT NULL, - table_name TEXT NOT NULL, + table_id BIGINT NOT NULL, -- Report metadata timestamp_ms BIGINT NOT NULL, diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index 9fc4463c23..4593496779 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -165,7 +165,6 @@ void testWriteMultipleScanReports() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(100L + i) .timestampMs(System.currentTimeMillis()) @@ -198,7 +197,6 @@ void testWriteReportWithNullOptionalFields() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db") .tableId(99999L) .timestampMs(System.currentTimeMillis()) @@ -236,7 +234,6 @@ void testQueryScanMetricsReportsByTable() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(88888L) .timestampMs(baseTime + i * 1000) @@ -261,9 +258,7 @@ void testQueryScanMetricsReportsByTable() { } // Query all reports for the table - var results = - persistence.queryScanMetricsReports( - 12345L, "db.schema", 88888L, null, null, 10); + var results = persistence.queryScanMetricsReports(12345L, "db.schema", 88888L, null, null, 10); assertThat(results).hasSize(5); // Query with time range @@ -274,8 +269,7 @@ void testQueryScanMetricsReportsByTable() { // Query with limit var limitedResults = - persistence.queryScanMetricsReports( - 12345L, "db.schema", 88888L, null, null, 2); + persistence.queryScanMetricsReports(12345L, "db.schema", 88888L, null, null, 2); assertThat(limitedResults).hasSize(2); } @@ -289,7 +283,6 @@ void testQueryScanMetricsReportsByTraceId() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db") .tableId(77777L) .timestampMs(System.currentTimeMillis()) @@ -332,7 +325,6 @@ void testDeleteOldScanMetricsReports() { .reportId("old-report-" + UUID.randomUUID()) .realmId("TEST_REALM") .catalogId(11111L) - .namespace("test_namespace") .tableId(67890L) .timestampMs(twoDaysAgo) @@ -361,7 +353,6 @@ void testDeleteOldScanMetricsReports() { .reportId("recent-report-" + UUID.randomUUID()) .realmId("TEST_REALM") .catalogId(11111L) - .namespace("test_namespace") .tableId(67890L) .timestampMs(oneHourAgo) @@ -393,8 +384,7 @@ void testDeleteOldScanMetricsReports() { // Query to verify only recent report remains var results = - persistence.queryScanMetricsReports( - 11111L, "test_namespace", 67890L, null, null, 10); + persistence.queryScanMetricsReports(11111L, "test_namespace", 67890L, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -412,7 +402,6 @@ void testDeleteOldCommitMetricsReports() { .reportId("old-commit-" + UUID.randomUUID()) .realmId("TEST_REALM") .catalogId(11111L) - .namespace("test_namespace") .tableId(67890L) .timestampMs(twoDaysAgo) @@ -446,7 +435,6 @@ void testDeleteOldCommitMetricsReports() { .reportId("recent-commit-" + UUID.randomUUID()) .realmId("TEST_REALM") .catalogId(11111L) - .namespace("test_namespace") .tableId(67890L) .timestampMs(oneHourAgo) @@ -483,8 +471,7 @@ void testDeleteOldCommitMetricsReports() { // Query to verify only recent report remains var results = - persistence.queryCommitMetricsReports( - 11111L, "test_namespace", 67890L, null, null, 10); + persistence.queryCommitMetricsReports(11111L, "test_namespace", 67890L, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -518,7 +505,6 @@ void testWriteScanMetricsReport_OlderSchema_IsNoOp() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db") .tableId(67890L) .timestampMs(System.currentTimeMillis()) @@ -553,7 +539,6 @@ void testWriteCommitMetricsReport_OlderSchema_IsNoOp() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db") .tableId(67890L) .timestampMs(System.currentTimeMillis()) @@ -655,7 +640,6 @@ void testWriteScanMetricsReportWithRoles() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(67890L) .timestampMs(System.currentTimeMillis()) @@ -696,7 +680,6 @@ void testWriteCommitMetricsReportWithRoles() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(67890L) .timestampMs(System.currentTimeMillis()) @@ -744,7 +727,6 @@ void testScanMetricsReportRolesAreReadBack() { .reportId(reportId) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(67890L) .timestampMs(System.currentTimeMillis()) @@ -796,7 +778,6 @@ void testCommitMetricsReportRolesAreReadBack() { .reportId(reportId) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(67890L) .timestampMs(System.currentTimeMillis()) @@ -850,7 +831,6 @@ void testScanMetricsReportWithEmptyRoles() { .reportId(reportId) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(67890L) .timestampMs(System.currentTimeMillis()) @@ -901,7 +881,6 @@ void testScanMetricsReportRolesViaTimeRangeQuery() { .reportId(reportId) .realmId("TEST_REALM") .catalogId(22222L) - .namespace("db.schema") .tableId(66666L) .timestampMs(timestamp) @@ -936,12 +915,7 @@ void testScanMetricsReportRolesViaTimeRangeQuery() { // Query by time range and verify roles are returned List results = persistence.queryScanMetricsReports( - 22222L, - "db.schema", - 66666L, - timestamp - 1000, - timestamp + 1000, - 100); + 22222L, "db.schema", 66666L, timestamp - 1000, timestamp + 1000, 100); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(reportId); From 516af0a1c5891d77c9b8dd2673c62f04cfbff0e0 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 18:09:19 -0800 Subject: [PATCH 47/67] refactor: Remove namespace from JDBC query methods Per reviewer feedback - queries should be by tableId only, not by namespace. The namespace is implicit in the tableId. If users want to query by namespace, the service layer should resolve namespace to table IDs using the current catalog state, then query by those IDs. Changes: - Remove namespace parameter from queryScanMetricsReports() - Remove namespace parameter from queryCommitMetricsReports() - Update SQL WHERE clauses to not filter by namespace - Make tableId required for queries (was optional with default 0) - Update all tests --- .../jdbc/JdbcBasePersistenceImpl.java | 12 ++++------ .../jdbc/JdbcMetricsPersistence.java | 14 +++++------- .../jdbc/MetricsReportPersistenceTest.java | 22 +++++++------------ 3 files changed, 18 insertions(+), 30 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index 70a4978997..d3e8f1854e 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -538,7 +538,6 @@ private List loadCommitMetricsReportRoles( *

      This method requires schema version 4 or higher. On older schemas, returns an empty list. * * @param catalogId the catalog entity ID - * @param namespace the namespace (dot-separated) * @param tableId the table entity ID * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound @@ -549,7 +548,6 @@ private List loadCommitMetricsReportRoles( @Nonnull public List queryScanMetricsReports( long catalogId, - @Nonnull String namespace, long tableId, @Nullable Long startTimeMs, @Nullable Long endTimeMs, @@ -559,8 +557,8 @@ public List queryScanMetricsReports( } try { StringBuilder whereClause = new StringBuilder(); - whereClause.append("realm_id = ? AND catalog_id = ? AND namespace = ? AND table_id = ?"); - List values = new ArrayList<>(List.of(realmId, catalogId, namespace, tableId)); + whereClause.append("realm_id = ? AND catalog_id = ? AND table_id = ?"); + List values = new ArrayList<>(List.of(realmId, catalogId, tableId)); if (startTimeMs != null) { whereClause.append(" AND timestamp_ms >= ?"); @@ -598,7 +596,6 @@ public List queryScanMetricsReports( *

      This method requires schema version 4 or higher. On older schemas, returns an empty list. * * @param catalogId the catalog entity ID - * @param namespace the namespace (dot-separated) * @param tableId the table entity ID * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound @@ -609,7 +606,6 @@ public List queryScanMetricsReports( @Nonnull public List queryCommitMetricsReports( long catalogId, - @Nonnull String namespace, long tableId, @Nullable Long startTimeMs, @Nullable Long endTimeMs, @@ -618,10 +614,10 @@ public List queryCommitMetricsReports( return Collections.emptyList(); } try { - List values = new ArrayList<>(List.of(realmId, catalogId, namespace, tableId)); + List values = new ArrayList<>(List.of(realmId, catalogId, tableId)); StringBuilder whereClause = new StringBuilder(); - whereClause.append("realm_id = ? AND catalog_id = ? AND namespace = ? AND table_id = ?"); + whereClause.append("realm_id = ? AND catalog_id = ? AND table_id = ?"); if (startTimeMs != null) { whereClause.append(" AND timestamp_ms >= ?"); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java index 8798b8dd52..89e6d0df06 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java @@ -79,8 +79,8 @@ public Page queryScanReports( return Page.fromItems(List.of()); } - // catalogId is required for queries - if (criteria.catalogId().isEmpty()) { + // catalogId and tableId are required for queries + if (criteria.catalogId().isEmpty() || criteria.tableId().isEmpty()) { return Page.fromItems(List.of()); } @@ -91,8 +91,7 @@ public Page queryScanReports( List models = jdbcPersistence.queryScanMetricsReports( criteria.catalogId().getAsLong(), - String.join(".", criteria.namespace()), - criteria.tableId().orElse(0L), + criteria.tableId().getAsLong(), startTimeMs, endTimeMs, limit); @@ -111,8 +110,8 @@ public Page queryCommitReports( return Page.fromItems(List.of()); } - // catalogId is required for queries - if (criteria.catalogId().isEmpty()) { + // catalogId and tableId are required for queries + if (criteria.catalogId().isEmpty() || criteria.tableId().isEmpty()) { return Page.fromItems(List.of()); } @@ -123,8 +122,7 @@ public Page queryCommitReports( List models = jdbcPersistence.queryCommitMetricsReports( criteria.catalogId().getAsLong(), - String.join(".", criteria.namespace()), - criteria.tableId().orElse(0L), + criteria.tableId().getAsLong(), startTimeMs, endTimeMs, limit); diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index 4593496779..7b8e810c8a 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -258,18 +258,16 @@ void testQueryScanMetricsReportsByTable() { } // Query all reports for the table - var results = persistence.queryScanMetricsReports(12345L, "db.schema", 88888L, null, null, 10); + var results = persistence.queryScanMetricsReports(12345L, 88888L, null, null, 10); assertThat(results).hasSize(5); // Query with time range var rangeResults = - persistence.queryScanMetricsReports( - 12345L, "db.schema", 88888L, baseTime + 1000, baseTime + 4000, 10); + persistence.queryScanMetricsReports(12345L, 88888L, baseTime + 1000, baseTime + 4000, 10); assertThat(rangeResults).hasSize(3); // Query with limit - var limitedResults = - persistence.queryScanMetricsReports(12345L, "db.schema", 88888L, null, null, 2); + var limitedResults = persistence.queryScanMetricsReports(12345L, 88888L, null, null, 2); assertThat(limitedResults).hasSize(2); } @@ -383,8 +381,7 @@ void testDeleteOldScanMetricsReports() { assertThat(deleted).isEqualTo(1); // Query to verify only recent report remains - var results = - persistence.queryScanMetricsReports(11111L, "test_namespace", 67890L, null, null, 10); + var results = persistence.queryScanMetricsReports(11111L, 67890L, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -470,8 +467,7 @@ void testDeleteOldCommitMetricsReports() { assertThat(deleted).isEqualTo(1); // Query to verify only recent report remains - var results = - persistence.queryCommitMetricsReports(11111L, "test_namespace", 67890L, null, null, 10); + var results = persistence.queryCommitMetricsReports(11111L, 67890L, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -572,8 +568,7 @@ void testWriteCommitMetricsReport_OlderSchema_IsNoOp() { void testQueryScanMetricsReports_OlderSchema_ReturnsEmptyList() { JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); - var results = - v3Persistence.queryScanMetricsReports(12345L, "namespace", 67890L, null, null, 10); + var results = v3Persistence.queryScanMetricsReports(12345L, 67890L, null, null, 10); assertThat(results).isEmpty(); } @@ -582,8 +577,7 @@ void testQueryScanMetricsReports_OlderSchema_ReturnsEmptyList() { void testQueryCommitMetricsReports_OlderSchema_ReturnsEmptyList() { JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); - var results = - v3Persistence.queryCommitMetricsReports(12345L, "namespace", 67890L, null, null, 10); + var results = v3Persistence.queryCommitMetricsReports(12345L, 67890L, null, null, 10); assertThat(results).isEmpty(); } @@ -915,7 +909,7 @@ void testScanMetricsReportRolesViaTimeRangeQuery() { // Query by time range and verify roles are returned List results = persistence.queryScanMetricsReports( - 22222L, "db.schema", 66666L, timestamp - 1000, timestamp + 1000, 100); + 22222L, 66666L, timestamp - 1000, timestamp + 1000, 100); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(reportId); From f7e5e45f762195fc8edd75f6a65dbc4b03b493d3 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 20:15:00 -0800 Subject: [PATCH 48/67] fix: Update PersistingMetricsReporter to use tableId and namespace API Updated the reporter to: - Look up the table entity to get the table ID - Use tableId(long) and namespace(List) instead of the removed tableIdentifier(TableIdentifier) method - Use correct types: PolarisBaseEntity for entity results and PolarisEntityCore for catalog path --- .../reporting/PersistingMetricsReporter.java | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java index 5a4cec6de5..cda2ca2fc7 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java @@ -22,12 +22,17 @@ import jakarta.enterprise.context.RequestScoped; import jakarta.inject.Inject; import java.time.Instant; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.metrics.CommitReport; import org.apache.iceberg.metrics.MetricsReport; import org.apache.iceberg.metrics.ScanReport; import org.apache.polaris.core.context.CallContext; import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.entity.PolarisBaseEntity; +import org.apache.polaris.core.entity.PolarisEntity; +import org.apache.polaris.core.entity.PolarisEntityCore; import org.apache.polaris.core.entity.PolarisEntitySubType; import org.apache.polaris.core.entity.PolarisEntityType; import org.apache.polaris.core.metrics.iceberg.MetricsRecordConverter; @@ -103,13 +108,37 @@ public void reportMetric( return; } - long catalogId = catalogResult.getEntity().getId(); + PolarisBaseEntity catalogEntity = catalogResult.getEntity(); + long catalogId = catalogEntity.getId(); + + // Look up the table entity to get the table ID + // Build the path from catalog to table through namespace + List catalogPath = List.of(PolarisEntity.toCore(catalogEntity)); + EntityResult tableResult = + metaStoreManager.readEntityByName( + callContext.getPolarisCallContext(), + catalogPath, + PolarisEntityType.TABLE_LIKE, + PolarisEntitySubType.ANY_SUBTYPE, + table.name()); + + if (!tableResult.isSuccess()) { + LOGGER.warn( + "Failed to find table '{}' in catalog '{}' for metrics persistence. Metrics will not be stored.", + table, + catalogName); + return; + } + + long tableId = tableResult.getEntity().getId(); + List namespace = Arrays.asList(table.namespace().levels()); if (metricsReport instanceof ScanReport scanReport) { ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport) .catalogId(catalogId) - .tableIdentifier(table) + .tableId(tableId) + .namespace(namespace) .build(); persistence.writeScanReport(record); LOGGER.debug( @@ -118,7 +147,8 @@ public void reportMetric( CommitMetricsRecord record = MetricsRecordConverter.forCommitReport(commitReport) .catalogId(catalogId) - .tableIdentifier(table) + .tableId(tableId) + .namespace(namespace) .build(); persistence.writeCommitReport(record); LOGGER.debug( From d6b47342ee17f727b9eb9f2b7b55d235821deef7 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 21:11:16 -0800 Subject: [PATCH 49/67] fix: Update PersistingMetricsReporterTest for new tableId/namespace API Updated tests to: - Mock table entity lookup (in addition to catalog lookup) - Assert on tableId and namespace instead of removed catalogName and tableIdentifier - Add createTableEntity helper method --- .../PersistingMetricsReporterTest.java | 57 +++++++++++++++++-- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java index c5c4a2049a..6a3feace2e 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java @@ -27,6 +27,8 @@ import static org.mockito.Mockito.when; import java.time.Instant; +import java.util.Arrays; +import java.util.List; import java.util.Map; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; @@ -61,8 +63,11 @@ public class PersistingMetricsReporterTest { private static final String CATALOG_NAME = "test-catalog"; private static final long CATALOG_ID = 12345L; + private static final long TABLE_ID = 67890L; + private static final String TABLE_NAME = "test_table"; + private static final List NAMESPACE = Arrays.asList("db", "schema"); private static final TableIdentifier TABLE_IDENTIFIER = - TableIdentifier.of(Namespace.of("db", "schema"), "test_table"); + TableIdentifier.of(Namespace.of("db", "schema"), TABLE_NAME); private RealmContext realmContext; private CallContext callContext; @@ -103,6 +108,16 @@ void testReportScanMetrics() { eq(CATALOG_NAME))) .thenReturn(new EntityResult(catalogEntity)); + // Setup table lookup + PolarisBaseEntity tableEntity = createTableEntity(TABLE_ID, TABLE_NAME, CATALOG_ID); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + any(), + eq(PolarisEntityType.TABLE_LIKE), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq(TABLE_NAME))) + .thenReturn(new EntityResult(tableEntity)); + // Create a scan report ScanReport scanReport = createScanReport(); @@ -115,8 +130,8 @@ void testReportScanMetrics() { ScanMetricsRecord record = captor.getValue(); assertThat(record.catalogId()).isEqualTo(CATALOG_ID); - assertThat(record.catalogName()).isEqualTo(CATALOG_NAME); - assertThat(record.tableIdentifier()).isEqualTo(TABLE_IDENTIFIER); + assertThat(record.tableId()).isEqualTo(TABLE_ID); + assertThat(record.namespace()).isEqualTo(NAMESPACE); assertThat(record.reportId()).isNotNull(); } @@ -132,6 +147,16 @@ void testReportCommitMetrics() { eq(CATALOG_NAME))) .thenReturn(new EntityResult(catalogEntity)); + // Setup table lookup + PolarisBaseEntity tableEntity = createTableEntity(TABLE_ID, TABLE_NAME, CATALOG_ID); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + any(), + eq(PolarisEntityType.TABLE_LIKE), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq(TABLE_NAME))) + .thenReturn(new EntityResult(tableEntity)); + // Create a commit report CommitReport commitReport = createCommitReport(); @@ -144,8 +169,8 @@ void testReportCommitMetrics() { CommitMetricsRecord record = captor.getValue(); assertThat(record.catalogId()).isEqualTo(CATALOG_ID); - assertThat(record.catalogName()).isEqualTo(CATALOG_NAME); - assertThat(record.tableIdentifier()).isEqualTo(TABLE_IDENTIFIER); + assertThat(record.tableId()).isEqualTo(TABLE_ID); + assertThat(record.namespace()).isEqualTo(NAMESPACE); assertThat(record.reportId()).isNotNull(); } @@ -183,6 +208,16 @@ void testUnknownReportType() { eq(CATALOG_NAME))) .thenReturn(new EntityResult(catalogEntity)); + // Setup table lookup + PolarisBaseEntity tableEntity = createTableEntity(TABLE_ID, TABLE_NAME, CATALOG_ID); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + any(), + eq(PolarisEntityType.TABLE_LIKE), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq(TABLE_NAME))) + .thenReturn(new EntityResult(tableEntity)); + // Create an unknown report type (using a mock) MetricsReport unknownReport = mock(MetricsReport.class); @@ -206,6 +241,18 @@ private PolarisBaseEntity createCatalogEntity(long id, String name) { .build(); } + private PolarisBaseEntity createTableEntity(long id, String name, long catalogId) { + return new PolarisBaseEntity.Builder() + .catalogId(catalogId) + .id(id) + .parentId(catalogId) // Parent is the catalog for simplicity + .typeCode(PolarisEntityType.TABLE_LIKE.getCode()) + .subTypeCode(PolarisEntitySubType.ICEBERG_TABLE.getCode()) + .name(name) + .entityVersion(1) + .build(); + } + private ScanReport createScanReport() { return ImmutableScanReport.builder() .tableName("db.schema.test_table") From ba3668bba308b6b6acda3f16997dddf159feaaac Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 08:02:41 -0800 Subject: [PATCH 50/67] fix: address code review correctness issues - Fix namespace serialization: use JSON array instead of dot-join to preserve segment boundaries when namespace levels contain dots - Fix pagination: implement cursor-based pagination with ReportIdToken, pass lastReportId to queries, return continuation tokens - Fix table lookup: resolve full namespace path before table lookup to correctly handle tables in nested namespaces - Fix timestamp semantics: use receivedTimestamp from reporter instead of Instant.now() at conversion time Updated tests to match new behavior. --- .../jdbc/JdbcBasePersistenceImpl.java | 31 ++++++++- .../jdbc/JdbcMetricsPersistence.java | 24 ++++++- .../relational/jdbc/SpiModelConverter.java | 25 +++++-- .../jdbc/MetricsReportPersistenceTest.java | 17 ++--- .../jdbc/SpiModelConverterTest.java | 3 +- .../iceberg/MetricsRecordConverter.java | 34 ++++++++- .../reporting/PersistingMetricsReporter.java | 38 ++++++++-- .../PersistingMetricsReporterTest.java | 69 +++++++++++++++++++ 8 files changed, 217 insertions(+), 24 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index d3e8f1854e..a812412b5f 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -545,12 +545,28 @@ private List loadCommitMetricsReportRoles( * @return list of scan metrics reports matching the criteria, or empty list if schema version * < 4 */ + /** + * Retrieves scan metrics reports for a specific table within a time range. + * + *

      This method requires schema version 4 or higher. On older schemas, returns an empty list. + * + * @param catalogId the catalog entity ID + * @param tableId the table entity ID + * @param startTimeMs start of time range (inclusive), or null for no lower bound + * @param endTimeMs end of time range (exclusive), or null for no upper bound + * @param lastReportId cursor for pagination: return results after this report ID, or null for + * first page + * @param limit maximum number of results to return + * @return list of scan metrics reports matching the criteria, or empty list if schema version + * < 4 + */ @Nonnull public List queryScanMetricsReports( long catalogId, long tableId, @Nullable Long startTimeMs, @Nullable Long endTimeMs, + @Nullable String lastReportId, int limit) { if (!supportsMetricsPersistence()) { return Collections.emptyList(); @@ -568,13 +584,17 @@ public List queryScanMetricsReports( whereClause.append(" AND timestamp_ms < ?"); values.add(endTimeMs); } + if (lastReportId != null) { + whereClause.append(" AND report_id > ?"); + values.add(lastReportId); + } String sql = "SELECT * FROM " + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) + " WHERE " + whereClause - + " ORDER BY timestamp_ms DESC LIMIT " + + " ORDER BY report_id ASC LIMIT " + limit; PreparedQuery query = new PreparedQuery(sql, values); @@ -599,6 +619,8 @@ public List queryScanMetricsReports( * @param tableId the table entity ID * @param startTimeMs start of time range (inclusive), or null for no lower bound * @param endTimeMs end of time range (exclusive), or null for no upper bound + * @param lastReportId cursor for pagination: return results after this report ID, or null for + * first page * @param limit maximum number of results to return * @return list of commit metrics reports matching the criteria, or empty list if schema version * < 4 @@ -609,6 +631,7 @@ public List queryCommitMetricsReports( long tableId, @Nullable Long startTimeMs, @Nullable Long endTimeMs, + @Nullable String lastReportId, int limit) { if (!supportsMetricsPersistence()) { return Collections.emptyList(); @@ -627,13 +650,17 @@ public List queryCommitMetricsReports( whereClause.append(" AND timestamp_ms < ?"); values.add(endTimeMs); } + if (lastReportId != null) { + whereClause.append(" AND report_id > ?"); + values.add(lastReportId); + } String sql = "SELECT * FROM " + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) + " WHERE " + whereClause - + " ORDER BY timestamp_ms DESC LIMIT " + + " ORDER BY report_id ASC LIMIT " + limit; PreparedQuery query = new PreparedQuery(sql, values); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java index 89e6d0df06..e9b2244bc8 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java @@ -24,9 +24,11 @@ import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; import org.apache.polaris.core.persistence.metrics.MetricsPersistence; import org.apache.polaris.core.persistence.metrics.MetricsQueryCriteria; +import org.apache.polaris.core.persistence.metrics.ReportIdToken; import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; import org.apache.polaris.core.persistence.pagination.Page; import org.apache.polaris.core.persistence.pagination.PageToken; +import org.apache.polaris.core.persistence.pagination.Token; import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; @@ -88,18 +90,27 @@ public Page queryScanReports( Long startTimeMs = criteria.startTime().map(t -> t.toEpochMilli()).orElse(null); Long endTimeMs = criteria.endTime().map(t -> t.toEpochMilli()).orElse(null); + // Extract cursor from page token if present + String lastReportId = + pageToken.valueAs(ReportIdToken.class).map(ReportIdToken::reportId).orElse(null); + List models = jdbcPersistence.queryScanMetricsReports( criteria.catalogId().getAsLong(), criteria.tableId().getAsLong(), startTimeMs, endTimeMs, + lastReportId, limit); List records = models.stream().map(SpiModelConverter::toScanMetricsRecord).collect(Collectors.toList()); - return Page.fromItems(records); + // Build continuation token if we have results (there may be more pages) + Token nextToken = + records.isEmpty() ? null : ReportIdToken.fromReportId(records.getLast().reportId()); + + return Page.page(pageToken, records, nextToken); } @Override @@ -119,17 +130,26 @@ public Page queryCommitReports( Long startTimeMs = criteria.startTime().map(t -> t.toEpochMilli()).orElse(null); Long endTimeMs = criteria.endTime().map(t -> t.toEpochMilli()).orElse(null); + // Extract cursor from page token if present + String lastReportId = + pageToken.valueAs(ReportIdToken.class).map(ReportIdToken::reportId).orElse(null); + List models = jdbcPersistence.queryCommitMetricsReports( criteria.catalogId().getAsLong(), criteria.tableId().getAsLong(), startTimeMs, endTimeMs, + lastReportId, limit); List records = models.stream().map(SpiModelConverter::toCommitMetricsRecord).collect(Collectors.toList()); - return Page.fromItems(records); + // Build continuation token if we have results (there may be more pages) + Token nextToken = + records.isEmpty() ? null : ReportIdToken.fromReportId(records.getLast().reportId()); + + return Page.page(pageToken, records, nextToken); } } diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java index 7e0f0449bb..90982b2f83 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java @@ -72,7 +72,7 @@ public static ModelScanMetricsReport toModelScanReport(ScanMetricsRecord record, .reportId(record.reportId()) .realmId(realmId) .catalogId(record.catalogId()) - .namespace(String.join(".", record.namespace())) + .namespace(toNamespaceJson(record.namespace())) .tableId(record.tableId()) .timestampMs(record.timestamp().toEpochMilli()) .snapshotId(record.snapshotId().orElse(null)) @@ -113,7 +113,7 @@ public static ModelCommitMetricsReport toModelCommitReport( .reportId(record.reportId()) .realmId(realmId) .catalogId(record.catalogId()) - .namespace(String.join(".", record.namespace())) + .namespace(toNamespaceJson(record.namespace())) .tableId(record.tableId()) .timestampMs(record.timestamp().toEpochMilli()) .snapshotId(record.snapshotId()) @@ -226,8 +226,14 @@ private static List parseNamespace(String namespace) { if (namespace == null || namespace.isEmpty()) { return Collections.emptyList(); } - // Namespace is stored as dot-separated string - return Arrays.asList(namespace.split("\\.")); + // Namespace is stored as a JSON array to preserve segment boundaries + // (namespace levels may contain dots) + try { + return OBJECT_MAPPER.readValue(namespace, new TypeReference>() {}); + } catch (JsonProcessingException e) { + // Fallback for any legacy dot-separated data + return Arrays.asList(namespace.split("\\.")); + } } private static String toCommaSeparated(List list) { @@ -258,6 +264,17 @@ private static List parseStringList(String commaSeparated) { .collect(Collectors.toList()); } + private static String toNamespaceJson(List namespace) { + if (namespace == null || namespace.isEmpty()) { + return ""; + } + try { + return OBJECT_MAPPER.writeValueAsString(namespace); + } catch (JsonProcessingException e) { + return ""; + } + } + private static String toJsonString(Map map) { if (map == null || map.isEmpty()) { return "{}"; diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index 7b8e810c8a..70e63a5b82 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -258,16 +258,17 @@ void testQueryScanMetricsReportsByTable() { } // Query all reports for the table - var results = persistence.queryScanMetricsReports(12345L, 88888L, null, null, 10); + var results = persistence.queryScanMetricsReports(12345L, 88888L, null, null, null, 10); assertThat(results).hasSize(5); // Query with time range var rangeResults = - persistence.queryScanMetricsReports(12345L, 88888L, baseTime + 1000, baseTime + 4000, 10); + persistence.queryScanMetricsReports( + 12345L, 88888L, baseTime + 1000, baseTime + 4000, null, 10); assertThat(rangeResults).hasSize(3); // Query with limit - var limitedResults = persistence.queryScanMetricsReports(12345L, 88888L, null, null, 2); + var limitedResults = persistence.queryScanMetricsReports(12345L, 88888L, null, null, null, 2); assertThat(limitedResults).hasSize(2); } @@ -381,7 +382,7 @@ void testDeleteOldScanMetricsReports() { assertThat(deleted).isEqualTo(1); // Query to verify only recent report remains - var results = persistence.queryScanMetricsReports(11111L, 67890L, null, null, 10); + var results = persistence.queryScanMetricsReports(11111L, 67890L, null, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -467,7 +468,7 @@ void testDeleteOldCommitMetricsReports() { assertThat(deleted).isEqualTo(1); // Query to verify only recent report remains - var results = persistence.queryCommitMetricsReports(11111L, 67890L, null, null, 10); + var results = persistence.queryCommitMetricsReports(11111L, 67890L, null, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -568,7 +569,7 @@ void testWriteCommitMetricsReport_OlderSchema_IsNoOp() { void testQueryScanMetricsReports_OlderSchema_ReturnsEmptyList() { JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); - var results = v3Persistence.queryScanMetricsReports(12345L, 67890L, null, null, 10); + var results = v3Persistence.queryScanMetricsReports(12345L, 67890L, null, null, null, 10); assertThat(results).isEmpty(); } @@ -577,7 +578,7 @@ void testQueryScanMetricsReports_OlderSchema_ReturnsEmptyList() { void testQueryCommitMetricsReports_OlderSchema_ReturnsEmptyList() { JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); - var results = v3Persistence.queryCommitMetricsReports(12345L, 67890L, null, null, 10); + var results = v3Persistence.queryCommitMetricsReports(12345L, 67890L, null, null, null, 10); assertThat(results).isEmpty(); } @@ -909,7 +910,7 @@ void testScanMetricsReportRolesViaTimeRangeQuery() { // Query by time range and verify roles are returned List results = persistence.queryScanMetricsReports( - 22222L, 66666L, timestamp - 1000, timestamp + 1000, 100); + 22222L, 66666L, timestamp - 1000, timestamp + 1000, null, 100); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(reportId); diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java index f6e611ea01..d29f8a3851 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java @@ -39,7 +39,8 @@ public class SpiModelConverterTest { private static final String TEST_REALM_ID = "realm-1"; private static final long TEST_CATALOG_ID = 12345L; private static final List TEST_NAMESPACE = List.of("db", "schema"); - private static final String TEST_NAMESPACE_STR = "db.schema"; + // Namespace is stored as JSON array + private static final String TEST_NAMESPACE_STR = "[\"db\",\"schema\"]"; private static final long TEST_TABLE_ID = 67890L; private static final Instant TEST_TIMESTAMP = Instant.ofEpochMilli(1704067200000L); private static final long TEST_TIMESTAMP_MS = 1704067200000L; diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index 1a118d3e46..039011c2d0 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -81,6 +81,7 @@ public static final class ScanReportBuilder { private long catalogId; private long tableId; private List namespace = Collections.emptyList(); + private Instant timestamp; private ScanReportBuilder(ScanReport scanReport) { this.scanReport = scanReport; @@ -115,6 +116,20 @@ public ScanReportBuilder namespace(List namespace) { return this; } + /** + * Sets the timestamp for the metrics record. + * + *

      This should be the time the metrics report was received by the server, which may differ + * from the time it was recorded by the client. + * + * @param timestamp the timestamp + * @return this builder + */ + public ScanReportBuilder timestamp(Instant timestamp) { + this.timestamp = timestamp; + return this; + } + public ScanMetricsRecord build() { ScanMetricsResult metrics = scanReport.scanMetrics(); Map reportMetadata = @@ -125,7 +140,7 @@ public ScanMetricsRecord build() { .catalogId(catalogId) .namespace(namespace) .tableId(tableId) - .timestamp(Instant.now()) + .timestamp(timestamp != null ? timestamp : Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) .filterExpression( @@ -167,6 +182,7 @@ public static final class CommitReportBuilder { private long catalogId; private long tableId; private List namespace = Collections.emptyList(); + private Instant timestamp; private CommitReportBuilder(CommitReport commitReport) { this.commitReport = commitReport; @@ -201,6 +217,20 @@ public CommitReportBuilder namespace(List namespace) { return this; } + /** + * Sets the timestamp for the metrics record. + * + *

      This should be the time the metrics report was received by the server, which may differ + * from the time it was recorded by the client. + * + * @param timestamp the timestamp + * @return this builder + */ + public CommitReportBuilder timestamp(Instant timestamp) { + this.timestamp = timestamp; + return this; + } + public CommitMetricsRecord build() { CommitMetricsResult metrics = commitReport.commitMetrics(); Map reportMetadata = @@ -211,7 +241,7 @@ public CommitMetricsRecord build() { .catalogId(catalogId) .namespace(namespace) .tableId(tableId) - .timestamp(Instant.now()) + .timestamp(timestamp != null ? timestamp : Instant.now()) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) .operation(commitReport.operation()) diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java index cda2ca2fc7..7391ab9533 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java @@ -22,6 +22,7 @@ import jakarta.enterprise.context.RequestScoped; import jakarta.inject.Inject; import java.time.Instant; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.iceberg.catalog.TableIdentifier; @@ -111,13 +112,38 @@ public void reportMetric( PolarisBaseEntity catalogEntity = catalogResult.getEntity(); long catalogId = catalogEntity.getId(); - // Look up the table entity to get the table ID - // Build the path from catalog to table through namespace - List catalogPath = List.of(PolarisEntity.toCore(catalogEntity)); + // Build the full path from catalog through namespace to resolve the table. + // The path contains the catalog, then each namespace level. + // The last element in the path becomes the parent for the lookup. + List entityPath = new ArrayList<>(); + entityPath.add(PolarisEntity.toCore(catalogEntity)); + + // Resolve each namespace level + String[] namespaceLevels = table.namespace().levels(); + for (String nsLevel : namespaceLevels) { + EntityResult nsResult = + metaStoreManager.readEntityByName( + callContext.getPolarisCallContext(), + entityPath, + PolarisEntityType.NAMESPACE, + PolarisEntitySubType.ANY_SUBTYPE, + nsLevel); + + if (!nsResult.isSuccess()) { + LOGGER.warn( + "Failed to find namespace '{}' in catalog '{}' for metrics persistence. Metrics will not be stored.", + nsLevel, + catalogName); + return; + } + entityPath.add(PolarisEntity.toCore(nsResult.getEntity())); + } + + // Now look up the table with the full namespace path EntityResult tableResult = metaStoreManager.readEntityByName( callContext.getPolarisCallContext(), - catalogPath, + entityPath, PolarisEntityType.TABLE_LIKE, PolarisEntitySubType.ANY_SUBTYPE, table.name()); @@ -131,7 +157,7 @@ public void reportMetric( } long tableId = tableResult.getEntity().getId(); - List namespace = Arrays.asList(table.namespace().levels()); + List namespace = Arrays.asList(namespaceLevels); if (metricsReport instanceof ScanReport scanReport) { ScanMetricsRecord record = @@ -139,6 +165,7 @@ public void reportMetric( .catalogId(catalogId) .tableId(tableId) .namespace(namespace) + .timestamp(receivedTimestamp) .build(); persistence.writeScanReport(record); LOGGER.debug( @@ -149,6 +176,7 @@ public void reportMetric( .catalogId(catalogId) .tableId(tableId) .namespace(namespace) + .timestamp(receivedTimestamp) .build(); persistence.writeCommitReport(record); LOGGER.debug( diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java index 6a3feace2e..c33dc7bfbc 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java @@ -108,6 +108,25 @@ void testReportScanMetrics() { eq(CATALOG_NAME))) .thenReturn(new EntityResult(catalogEntity)); + // Setup namespace lookups - "db" and "schema" + PolarisBaseEntity dbNamespaceEntity = createNamespaceEntity(11111L, "db", CATALOG_ID); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + any(), + eq(PolarisEntityType.NAMESPACE), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq("db"))) + .thenReturn(new EntityResult(dbNamespaceEntity)); + + PolarisBaseEntity schemaNamespaceEntity = createNamespaceEntity(22222L, "schema", CATALOG_ID); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + any(), + eq(PolarisEntityType.NAMESPACE), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq("schema"))) + .thenReturn(new EntityResult(schemaNamespaceEntity)); + // Setup table lookup PolarisBaseEntity tableEntity = createTableEntity(TABLE_ID, TABLE_NAME, CATALOG_ID); when(metaStoreManager.readEntityByName( @@ -147,6 +166,25 @@ void testReportCommitMetrics() { eq(CATALOG_NAME))) .thenReturn(new EntityResult(catalogEntity)); + // Setup namespace lookups - "db" and "schema" + PolarisBaseEntity dbNamespaceEntity = createNamespaceEntity(11111L, "db", CATALOG_ID); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + any(), + eq(PolarisEntityType.NAMESPACE), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq("db"))) + .thenReturn(new EntityResult(dbNamespaceEntity)); + + PolarisBaseEntity schemaNamespaceEntity = createNamespaceEntity(22222L, "schema", CATALOG_ID); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + any(), + eq(PolarisEntityType.NAMESPACE), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq("schema"))) + .thenReturn(new EntityResult(schemaNamespaceEntity)); + // Setup table lookup PolarisBaseEntity tableEntity = createTableEntity(TABLE_ID, TABLE_NAME, CATALOG_ID); when(metaStoreManager.readEntityByName( @@ -208,6 +246,25 @@ void testUnknownReportType() { eq(CATALOG_NAME))) .thenReturn(new EntityResult(catalogEntity)); + // Setup namespace lookups - "db" and "schema" + PolarisBaseEntity dbNamespaceEntity = createNamespaceEntity(11111L, "db", CATALOG_ID); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + any(), + eq(PolarisEntityType.NAMESPACE), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq("db"))) + .thenReturn(new EntityResult(dbNamespaceEntity)); + + PolarisBaseEntity schemaNamespaceEntity = createNamespaceEntity(22222L, "schema", CATALOG_ID); + when(metaStoreManager.readEntityByName( + eq(polarisCallContext), + any(), + eq(PolarisEntityType.NAMESPACE), + eq(PolarisEntitySubType.ANY_SUBTYPE), + eq("schema"))) + .thenReturn(new EntityResult(schemaNamespaceEntity)); + // Setup table lookup PolarisBaseEntity tableEntity = createTableEntity(TABLE_ID, TABLE_NAME, CATALOG_ID); when(metaStoreManager.readEntityByName( @@ -241,6 +298,18 @@ private PolarisBaseEntity createCatalogEntity(long id, String name) { .build(); } + private PolarisBaseEntity createNamespaceEntity(long id, String name, long catalogId) { + return new PolarisBaseEntity.Builder() + .catalogId(catalogId) + .id(id) + .parentId(catalogId) // Parent is the catalog for simplicity + .typeCode(PolarisEntityType.NAMESPACE.getCode()) + .subTypeCode(PolarisEntitySubType.NULL_SUBTYPE.getCode()) + .name(name) + .entityVersion(1) + .build(); + } + private PolarisBaseEntity createTableEntity(long id, String name, long catalogId) { return new PolarisBaseEntity.Builder() .catalogId(catalogId) From 65e8cb439ba231f760964c269f42a198b51633fb Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 15:44:36 -0800 Subject: [PATCH 51/67] feat: Add separate metrics schema with --include-metrics bootstrap option - Create schema-metrics-v1.sql for H2 and PostgreSQL with independent version tracking - Add --include-metrics CLI option to BootstrapCommand - Add openMetricsSchemaResource() to DatabaseType for loading metrics schema - Update JdbcMetaStoreManagerFactory to optionally load metrics schema during bootstrap This allows metrics schema to evolve independently from the entity schema. --- .../relational/jdbc/DatabaseType.java | 22 +- .../relational/jdbc/JdbcBootstrapUtils.java | 14 ++ .../jdbc/JdbcMetaStoreManagerFactory.java | 7 + .../main/resources/h2/schema-metrics-v1.sql | 196 +++++++++++++++++ .../src/main/resources/h2/schema-v4.sql | 2 +- .../resources/postgres/schema-metrics-v1.sql | 207 ++++++++++++++++++ .../src/main/resources/postgres/schema-v4.sql | 2 +- .../persistence/bootstrap/SchemaOptions.java | 7 + .../polaris/admintool/BootstrapCommand.java | 9 + 9 files changed, 463 insertions(+), 3 deletions(-) create mode 100644 persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql create mode 100644 persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java index 1ae1b35a55..28b77ad8c2 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java @@ -50,7 +50,7 @@ public static DatabaseType fromDisplayName(String displayName) { */ public InputStream openInitScriptResource(int schemaVersion) { // Preconditions check is simpler and more direct than a switch default - if (schemaVersion <= 0 || schemaVersion > 3) { + if (schemaVersion <= 0 || schemaVersion > 4) { throw new IllegalArgumentException("Unknown or invalid schema version " + schemaVersion); } @@ -60,4 +60,24 @@ public InputStream openInitScriptResource(int schemaVersion) { ClassLoader classLoader = DatasourceOperations.class.getClassLoader(); return classLoader.getResourceAsStream(resourceName); } + + /** + * Open an InputStream that contains data from the metrics schema init script. This stream should + * be closed by the caller. + * + * @param metricsSchemaVersion the metrics schema version (currently only 1 is supported) + * @return an InputStream for the metrics schema SQL file + */ + public InputStream openMetricsSchemaResource(int metricsSchemaVersion) { + if (metricsSchemaVersion != 1) { + throw new IllegalArgumentException( + "Unknown or invalid metrics schema version " + metricsSchemaVersion); + } + + final String resourceName = + String.format("%s/schema-metrics-v%d.sql", this.getDisplayName(), metricsSchemaVersion); + + ClassLoader classLoader = DatasourceOperations.class.getClassLoader(); + return classLoader.getResourceAsStream(resourceName); + } } diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtils.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtils.java index 814417d1b8..a6e691cd01 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtils.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtils.java @@ -88,4 +88,18 @@ public static int getRequestedSchemaVersion(BootstrapOptions bootstrapOptions) { } return -1; } + + /** + * Determines whether the metrics schema should be included during bootstrap. + * + * @param bootstrapOptions The bootstrap options containing schema information. + * @return true if the metrics schema should be included, false otherwise. + */ + public static boolean shouldIncludeMetrics(BootstrapOptions bootstrapOptions) { + SchemaOptions schemaOptions = bootstrapOptions.schemaOptions(); + if (schemaOptions != null) { + return schemaOptions.includeMetrics(); + } + return false; + } } diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java index 26f38fc31b..24c1d025ca 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java @@ -172,6 +172,13 @@ public synchronized Map bootstrapRealms( datasourceOperations .getDatabaseType() .openInitScriptResource(effectiveSchemaVersion)); + + // Run the metrics schema script if requested + if (JdbcBootstrapUtils.shouldIncludeMetrics(bootstrapOptions)) { + LOGGER.info("Including metrics schema for realm: {}", realm); + datasourceOperations.executeScript( + datasourceOperations.getDatabaseType().openMetricsSchemaResource(1)); + } } catch (SQLException e) { throw new RuntimeException( String.format("Error executing sql script: %s", e.getMessage()), e); diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql new file mode 100644 index 0000000000..29e7ea0644 --- /dev/null +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql @@ -0,0 +1,196 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- + +-- ============================================================================ +-- POLARIS METRICS SCHEMA VERSION 1 (H2) +-- ============================================================================ +-- This schema is SEPARATE from the entity schema and can evolve independently. +-- It contains tables for storing Iceberg metrics reports. +-- +-- Tables: +-- * `metrics_version` - Version tracking for the metrics schema +-- * `scan_metrics_report` - Scan metrics reports +-- * `scan_metrics_report_roles` - Junction table for principal roles +-- * `commit_metrics_report` - Commit metrics reports +-- * `commit_metrics_report_roles` - Junction table for principal roles +-- ============================================================================ + +CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; +SET SCHEMA POLARIS_SCHEMA; + +-- Metrics schema version tracking (separate from entity schema version) +CREATE TABLE IF NOT EXISTS metrics_version ( + version_key VARCHAR PRIMARY KEY, + version_value INTEGER NOT NULL +); + +MERGE INTO metrics_version (version_key, version_value) + KEY (version_key) + VALUES ('metrics_version', 1); + +COMMENT ON TABLE metrics_version IS 'the version of the metrics schema in use'; + +-- ============================================================================ +-- SCAN METRICS REPORT TABLE +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS scan_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id BIGINT NOT NULL, + namespace TEXT NOT NULL, + table_id BIGINT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Scan context + snapshot_id BIGINT, + schema_id INTEGER, + filter_expression TEXT, + projected_field_ids TEXT, + projected_field_names TEXT, + + -- Scan metrics + result_data_files BIGINT DEFAULT 0, + result_delete_files BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + total_data_manifests BIGINT DEFAULT 0, + total_delete_manifests BIGINT DEFAULT 0, + scanned_data_manifests BIGINT DEFAULT 0, + scanned_delete_manifests BIGINT DEFAULT 0, + skipped_data_manifests BIGINT DEFAULT 0, + skipped_delete_manifests BIGINT DEFAULT 0, + skipped_data_files BIGINT DEFAULT 0, + skipped_delete_files BIGINT DEFAULT 0, + total_planning_duration_ms BIGINT DEFAULT 0, + + -- Equality/positional delete metrics + equality_delete_files BIGINT DEFAULT 0, + positional_delete_files BIGINT DEFAULT 0, + indexed_delete_files BIGINT DEFAULT 0, + total_delete_file_size_bytes BIGINT DEFAULT 0, + + -- Additional metadata (for extensibility) + metadata TEXT DEFAULT '{}', + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE scan_metrics_report IS 'Scan metrics reports as first-class entities'; + +-- Index for retention cleanup by timestamp +CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp ON scan_metrics_report(realm_id, timestamp_ms); + +-- Junction table for scan metrics report roles +CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; + +-- ============================================================================ +-- COMMIT METRICS REPORT TABLE +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS commit_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id BIGINT NOT NULL, + namespace TEXT NOT NULL, + table_id BIGINT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Commit context + snapshot_id BIGINT NOT NULL, + sequence_number BIGINT, + operation TEXT NOT NULL, + + -- File metrics + added_data_files BIGINT DEFAULT 0, + removed_data_files BIGINT DEFAULT 0, + total_data_files BIGINT DEFAULT 0, + added_delete_files BIGINT DEFAULT 0, + removed_delete_files BIGINT DEFAULT 0, + total_delete_files BIGINT DEFAULT 0, + + -- Equality delete files + added_equality_delete_files BIGINT DEFAULT 0, + removed_equality_delete_files BIGINT DEFAULT 0, + + -- Positional delete files + added_positional_delete_files BIGINT DEFAULT 0, + removed_positional_delete_files BIGINT DEFAULT 0, + + -- Record metrics + added_records BIGINT DEFAULT 0, + removed_records BIGINT DEFAULT 0, + total_records BIGINT DEFAULT 0, + + -- Size metrics + added_file_size_bytes BIGINT DEFAULT 0, + removed_file_size_bytes BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + + -- Duration and attempts + total_duration_ms BIGINT DEFAULT 0, + attempts INTEGER DEFAULT 1, + + -- Additional metadata (for extensibility) + metadata TEXT DEFAULT '{}', + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE commit_metrics_report IS 'Commit metrics reports as first-class entities'; + +-- Index for retention cleanup by timestamp +CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp ON commit_metrics_report(realm_id, timestamp_ms); + +-- Junction table for commit metrics report roles +CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; + diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql index 0f2ac75cc6..1e766a6969 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql @@ -31,7 +31,7 @@ CREATE TABLE IF NOT EXISTS version ( MERGE INTO version (version_key, version_value) KEY (version_key) - VALUES ('version', 3); + VALUES ('version', 4); -- H2 supports COMMENT, but some modes may ignore it COMMENT ON TABLE version IS 'the version of the JDBC schema in use'; diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql new file mode 100644 index 0000000000..8da86489ea --- /dev/null +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql @@ -0,0 +1,207 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- ============================================================================ +-- POLARIS METRICS SCHEMA VERSION 1 (PostgreSQL) +-- ============================================================================ +-- This schema is SEPARATE from the entity schema and can evolve independently. +-- It contains tables for storing Iceberg metrics reports. +-- +-- Tables: +-- * `metrics_version` - Version tracking for the metrics schema +-- * `scan_metrics_report` - Scan metrics reports +-- * `scan_metrics_report_roles` - Junction table for principal roles +-- * `commit_metrics_report` - Commit metrics reports +-- * `commit_metrics_report_roles` - Junction table for principal roles +-- ============================================================================ + +CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; +SET search_path TO POLARIS_SCHEMA; + +-- Metrics schema version tracking (separate from entity schema version) +CREATE TABLE IF NOT EXISTS metrics_version ( + version_key TEXT PRIMARY KEY, + version_value INTEGER NOT NULL +); + +INSERT INTO metrics_version (version_key, version_value) +VALUES ('metrics_version', 1) +ON CONFLICT (version_key) DO UPDATE +SET version_value = EXCLUDED.version_value; + +COMMENT ON TABLE metrics_version IS 'the version of the metrics schema in use'; + +-- ============================================================================ +-- SCAN METRICS REPORT TABLE +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS scan_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id BIGINT NOT NULL, + namespace TEXT NOT NULL, + table_id BIGINT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Scan context + snapshot_id BIGINT, + schema_id INTEGER, + filter_expression TEXT, + projected_field_ids TEXT, + projected_field_names TEXT, + + -- Scan metrics + result_data_files BIGINT DEFAULT 0, + result_delete_files BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + total_data_manifests BIGINT DEFAULT 0, + total_delete_manifests BIGINT DEFAULT 0, + scanned_data_manifests BIGINT DEFAULT 0, + scanned_delete_manifests BIGINT DEFAULT 0, + skipped_data_manifests BIGINT DEFAULT 0, + skipped_delete_manifests BIGINT DEFAULT 0, + skipped_data_files BIGINT DEFAULT 0, + skipped_delete_files BIGINT DEFAULT 0, + total_planning_duration_ms BIGINT DEFAULT 0, + + -- Equality/positional delete metrics + equality_delete_files BIGINT DEFAULT 0, + positional_delete_files BIGINT DEFAULT 0, + indexed_delete_files BIGINT DEFAULT 0, + total_delete_file_size_bytes BIGINT DEFAULT 0, + + -- Additional metadata (for extensibility) + metadata JSONB DEFAULT '{}'::JSONB, + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE scan_metrics_report IS 'Scan metrics reports as first-class entities'; +COMMENT ON COLUMN scan_metrics_report.report_id IS 'Unique identifier for the report'; +COMMENT ON COLUMN scan_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; +COMMENT ON COLUMN scan_metrics_report.catalog_id IS 'Catalog ID'; +COMMENT ON COLUMN scan_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; +COMMENT ON COLUMN scan_metrics_report.report_trace_id IS 'Trace ID from report metadata'; + +-- Index for retention cleanup by timestamp +CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp + ON scan_metrics_report(realm_id, timestamp_ms DESC); + +-- Junction table for scan metrics report roles +CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; + +-- ============================================================================ +-- COMMIT METRICS REPORT TABLE +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS commit_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id BIGINT NOT NULL, + namespace TEXT NOT NULL, + table_id BIGINT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Commit context + snapshot_id BIGINT NOT NULL, + sequence_number BIGINT, + operation TEXT NOT NULL, + + -- File metrics + added_data_files BIGINT DEFAULT 0, + removed_data_files BIGINT DEFAULT 0, + total_data_files BIGINT DEFAULT 0, + added_delete_files BIGINT DEFAULT 0, + removed_delete_files BIGINT DEFAULT 0, + total_delete_files BIGINT DEFAULT 0, + + -- Equality delete files + added_equality_delete_files BIGINT DEFAULT 0, + removed_equality_delete_files BIGINT DEFAULT 0, + + -- Positional delete files + added_positional_delete_files BIGINT DEFAULT 0, + removed_positional_delete_files BIGINT DEFAULT 0, + + -- Record metrics + added_records BIGINT DEFAULT 0, + removed_records BIGINT DEFAULT 0, + total_records BIGINT DEFAULT 0, + + -- Size metrics + added_file_size_bytes BIGINT DEFAULT 0, + removed_file_size_bytes BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + + -- Duration and attempts + total_duration_ms BIGINT DEFAULT 0, + attempts INTEGER DEFAULT 1, + + -- Additional metadata (for extensibility) + metadata JSONB DEFAULT '{}'::JSONB, + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE commit_metrics_report IS 'Commit metrics reports as first-class entities'; +COMMENT ON COLUMN commit_metrics_report.report_id IS 'Unique identifier for the report'; +COMMENT ON COLUMN commit_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; +COMMENT ON COLUMN commit_metrics_report.operation IS 'Commit operation type: append, overwrite, delete, replace'; +COMMENT ON COLUMN commit_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; + +-- Index for retention cleanup by timestamp +CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp + ON commit_metrics_report(realm_id, timestamp_ms DESC); + +-- Junction table for commit metrics report roles +CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; + diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql index 47d4ea8834..b1a5b5870b 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql @@ -28,7 +28,7 @@ CREATE TABLE IF NOT EXISTS version ( version_value INTEGER NOT NULL ); INSERT INTO version (version_key, version_value) -VALUES ('version', 3) +VALUES ('version', 4) ON CONFLICT (version_key) DO UPDATE SET version_value = EXCLUDED.version_value; COMMENT ON TABLE version IS 'the version of the JDBC schema in use'; diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/bootstrap/SchemaOptions.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/bootstrap/SchemaOptions.java index 5cfc20a889..8798a66f93 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/bootstrap/SchemaOptions.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/bootstrap/SchemaOptions.java @@ -21,8 +21,15 @@ import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; +import org.immutables.value.Value; @PolarisImmutable public interface SchemaOptions { Optional schemaVersion(); + + /** Whether to include the metrics schema during bootstrap. Defaults to false. */ + @Value.Default + default boolean includeMetrics() { + return false; + } } diff --git a/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapCommand.java b/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapCommand.java index 82d92f4e18..53c89ddd5c 100644 --- a/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapCommand.java +++ b/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapCommand.java @@ -93,6 +93,11 @@ static class SchemaInputOptions { paramLabel = "", description = "The version of the schema to load in [1, 2, 3, LATEST].") Integer schemaVersion; + + @CommandLine.Option( + names = {"--include-metrics"}, + description = "Include metrics schema tables during bootstrap.") + boolean includeMetrics; } } @@ -136,6 +141,10 @@ public Integer call() { builder.schemaVersion(inputOptions.schemaInputOptions.schemaVersion); } + if (inputOptions.schemaInputOptions.includeMetrics) { + builder.includeMetrics(true); + } + schemaOptions = builder.build(); } else { schemaOptions = ImmutableSchemaOptions.builder().build(); From fe2a7edb234884c1e154b3ba6f909c670c1cd574 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 15:58:50 -0800 Subject: [PATCH 52/67] chore: Sync with feat-3337-metrics-persistence-spi and feat-3337-schema-v4 branches Update SPI interfaces and schema files from their respective source branches: - SPI: Updated MetricsRecordConverter with timestamp() builder method - Schema: Added schema-metrics-v1.sql and --include-metrics bootstrap option --- .../relational/jdbc/DatabaseType.java | 22 +- .../relational/jdbc/JdbcBootstrapUtils.java | 14 ++ .../jdbc/JdbcMetaStoreManagerFactory.java | 23 +- .../main/resources/h2/schema-metrics-v1.sql | 196 +++++++++++++++++ .../src/main/resources/h2/schema-v4.sql | 198 +---------------- .../resources/postgres/schema-metrics-v1.sql | 207 ++++++++++++++++++ .../src/main/resources/postgres/schema-v4.sql | 188 +--------------- .../persistence/bootstrap/SchemaOptions.java | 7 + .../metrics/CommitMetricsRecord.java | 5 + .../metrics/MetricsPersistence.java | 5 + .../metrics/MetricsQueryCriteria.java | 5 + .../metrics/MetricsRecordIdentity.java | 5 + .../persistence/metrics/ReportIdToken.java | 5 + .../metrics/ScanMetricsRecord.java | 5 + .../polaris/admintool/BootstrapCommand.java | 9 + 15 files changed, 499 insertions(+), 395 deletions(-) create mode 100644 persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql create mode 100644 persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java index 1ae1b35a55..28b77ad8c2 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java @@ -50,7 +50,7 @@ public static DatabaseType fromDisplayName(String displayName) { */ public InputStream openInitScriptResource(int schemaVersion) { // Preconditions check is simpler and more direct than a switch default - if (schemaVersion <= 0 || schemaVersion > 3) { + if (schemaVersion <= 0 || schemaVersion > 4) { throw new IllegalArgumentException("Unknown or invalid schema version " + schemaVersion); } @@ -60,4 +60,24 @@ public InputStream openInitScriptResource(int schemaVersion) { ClassLoader classLoader = DatasourceOperations.class.getClassLoader(); return classLoader.getResourceAsStream(resourceName); } + + /** + * Open an InputStream that contains data from the metrics schema init script. This stream should + * be closed by the caller. + * + * @param metricsSchemaVersion the metrics schema version (currently only 1 is supported) + * @return an InputStream for the metrics schema SQL file + */ + public InputStream openMetricsSchemaResource(int metricsSchemaVersion) { + if (metricsSchemaVersion != 1) { + throw new IllegalArgumentException( + "Unknown or invalid metrics schema version " + metricsSchemaVersion); + } + + final String resourceName = + String.format("%s/schema-metrics-v%d.sql", this.getDisplayName(), metricsSchemaVersion); + + ClassLoader classLoader = DatasourceOperations.class.getClassLoader(); + return classLoader.getResourceAsStream(resourceName); + } } diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtils.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtils.java index 814417d1b8..a6e691cd01 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtils.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtils.java @@ -88,4 +88,18 @@ public static int getRequestedSchemaVersion(BootstrapOptions bootstrapOptions) { } return -1; } + + /** + * Determines whether the metrics schema should be included during bootstrap. + * + * @param bootstrapOptions The bootstrap options containing schema information. + * @return true if the metrics schema should be included, false otherwise. + */ + public static boolean shouldIncludeMetrics(BootstrapOptions bootstrapOptions) { + SchemaOptions schemaOptions = bootstrapOptions.schemaOptions(); + if (schemaOptions != null) { + return schemaOptions.includeMetrics(); + } + return false; + } } diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java index a52e2fdca2..24c1d025ca 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java @@ -52,7 +52,6 @@ import org.apache.polaris.core.persistence.cache.InMemoryEntityCache; import org.apache.polaris.core.persistence.dao.entity.BaseResult; import org.apache.polaris.core.persistence.dao.entity.PrincipalSecretsResult; -import org.apache.polaris.core.persistence.metrics.MetricsPersistence; import org.apache.polaris.core.storage.PolarisStorageIntegrationProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -173,6 +172,13 @@ public synchronized Map bootstrapRealms( datasourceOperations .getDatabaseType() .openInitScriptResource(effectiveSchemaVersion)); + + // Run the metrics schema script if requested + if (JdbcBootstrapUtils.shouldIncludeMetrics(bootstrapOptions)) { + LOGGER.info("Including metrics schema for realm: {}", realm); + datasourceOperations.executeScript( + datasourceOperations.getDatabaseType().openMetricsSchemaResource(1)); + } } catch (SQLException e) { throw new RuntimeException( String.format("Error executing sql script: %s", e.getMessage()), e); @@ -248,21 +254,6 @@ public synchronized EntityCache getOrCreateEntityCache( return entityCacheMap.get(realmContext.getRealmIdentifier()); } - @Override - public synchronized MetricsPersistence getOrCreateMetricsPersistence(RealmContext realmContext) { - // Ensure the session is initialized for this realm - BasePersistence session = getOrCreateSession(realmContext); - - if (session instanceof JdbcBasePersistenceImpl jdbcPersistence) { - // Return JDBC-specific metrics persistence if schema version supports it - // The JdbcMetricsPersistence will gracefully handle unsupported schemas - return new JdbcMetricsPersistence(jdbcPersistence, realmContext.getRealmIdentifier()); - } - - // Fallback to no-op for non-JDBC sessions (shouldn't happen in JDBC factory) - return MetricsPersistence.NOOP; - } - /** * In this method we check if Service was bootstrapped for a given realm, i.e. that all the * entities were created (root principal, root principal role, etc) If service was not diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql new file mode 100644 index 0000000000..29e7ea0644 --- /dev/null +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql @@ -0,0 +1,196 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- + +-- ============================================================================ +-- POLARIS METRICS SCHEMA VERSION 1 (H2) +-- ============================================================================ +-- This schema is SEPARATE from the entity schema and can evolve independently. +-- It contains tables for storing Iceberg metrics reports. +-- +-- Tables: +-- * `metrics_version` - Version tracking for the metrics schema +-- * `scan_metrics_report` - Scan metrics reports +-- * `scan_metrics_report_roles` - Junction table for principal roles +-- * `commit_metrics_report` - Commit metrics reports +-- * `commit_metrics_report_roles` - Junction table for principal roles +-- ============================================================================ + +CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; +SET SCHEMA POLARIS_SCHEMA; + +-- Metrics schema version tracking (separate from entity schema version) +CREATE TABLE IF NOT EXISTS metrics_version ( + version_key VARCHAR PRIMARY KEY, + version_value INTEGER NOT NULL +); + +MERGE INTO metrics_version (version_key, version_value) + KEY (version_key) + VALUES ('metrics_version', 1); + +COMMENT ON TABLE metrics_version IS 'the version of the metrics schema in use'; + +-- ============================================================================ +-- SCAN METRICS REPORT TABLE +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS scan_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id BIGINT NOT NULL, + namespace TEXT NOT NULL, + table_id BIGINT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Scan context + snapshot_id BIGINT, + schema_id INTEGER, + filter_expression TEXT, + projected_field_ids TEXT, + projected_field_names TEXT, + + -- Scan metrics + result_data_files BIGINT DEFAULT 0, + result_delete_files BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + total_data_manifests BIGINT DEFAULT 0, + total_delete_manifests BIGINT DEFAULT 0, + scanned_data_manifests BIGINT DEFAULT 0, + scanned_delete_manifests BIGINT DEFAULT 0, + skipped_data_manifests BIGINT DEFAULT 0, + skipped_delete_manifests BIGINT DEFAULT 0, + skipped_data_files BIGINT DEFAULT 0, + skipped_delete_files BIGINT DEFAULT 0, + total_planning_duration_ms BIGINT DEFAULT 0, + + -- Equality/positional delete metrics + equality_delete_files BIGINT DEFAULT 0, + positional_delete_files BIGINT DEFAULT 0, + indexed_delete_files BIGINT DEFAULT 0, + total_delete_file_size_bytes BIGINT DEFAULT 0, + + -- Additional metadata (for extensibility) + metadata TEXT DEFAULT '{}', + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE scan_metrics_report IS 'Scan metrics reports as first-class entities'; + +-- Index for retention cleanup by timestamp +CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp ON scan_metrics_report(realm_id, timestamp_ms); + +-- Junction table for scan metrics report roles +CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; + +-- ============================================================================ +-- COMMIT METRICS REPORT TABLE +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS commit_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id BIGINT NOT NULL, + namespace TEXT NOT NULL, + table_id BIGINT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Commit context + snapshot_id BIGINT NOT NULL, + sequence_number BIGINT, + operation TEXT NOT NULL, + + -- File metrics + added_data_files BIGINT DEFAULT 0, + removed_data_files BIGINT DEFAULT 0, + total_data_files BIGINT DEFAULT 0, + added_delete_files BIGINT DEFAULT 0, + removed_delete_files BIGINT DEFAULT 0, + total_delete_files BIGINT DEFAULT 0, + + -- Equality delete files + added_equality_delete_files BIGINT DEFAULT 0, + removed_equality_delete_files BIGINT DEFAULT 0, + + -- Positional delete files + added_positional_delete_files BIGINT DEFAULT 0, + removed_positional_delete_files BIGINT DEFAULT 0, + + -- Record metrics + added_records BIGINT DEFAULT 0, + removed_records BIGINT DEFAULT 0, + total_records BIGINT DEFAULT 0, + + -- Size metrics + added_file_size_bytes BIGINT DEFAULT 0, + removed_file_size_bytes BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + + -- Duration and attempts + total_duration_ms BIGINT DEFAULT 0, + attempts INTEGER DEFAULT 1, + + -- Additional metadata (for extensibility) + metadata TEXT DEFAULT '{}', + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE commit_metrics_report IS 'Commit metrics reports as first-class entities'; + +-- Index for retention cleanup by timestamp +CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp ON commit_metrics_report(realm_id, timestamp_ms); + +-- Junction table for commit metrics report roles +CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; + diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql index fd2bc29b50..1e766a6969 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql @@ -17,14 +17,9 @@ -- under the License. -- --- Changes from v3: --- * Added `events` table --- * Added `idempotency_records` table for REST idempotency --- * Added `scan_metrics_report` table for scan metrics as first-class entities --- * Added `scan_metrics_report_roles` junction table for principal roles --- * Added `commit_metrics_report` table for commit metrics as first-class entities --- * Added `commit_metrics_report_roles` junction table for principal roles --- ============================================================================ +-- Changes from v2: +-- * Added `events` table +-- * Added `idempotency_records` table for REST idempotency CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET SCHEMA POLARIS_SCHEMA; @@ -127,10 +122,6 @@ CREATE TABLE IF NOT EXISTS policy_mapping_record ( CREATE INDEX IF NOT EXISTS idx_policy_mapping_record ON policy_mapping_record (realm_id, policy_type_code, policy_catalog_id, policy_id, target_catalog_id, target_id); --- ============================================================================ --- EVENTS TABLE (NEW in v4) --- ============================================================================ - CREATE TABLE IF NOT EXISTS events ( realm_id TEXT NOT NULL, catalog_id TEXT NOT NULL, @@ -169,185 +160,4 @@ CREATE TABLE IF NOT EXISTS idempotency_records ( ); CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires - ON idempotency_records (realm_id, expires_at); - --- ============================================================================ --- IDEMPOTENCY RECORDS TABLE (NEW in v4) --- ============================================================================ - -CREATE TABLE IF NOT EXISTS idempotency_records ( - realm_id TEXT NOT NULL, - idempotency_key TEXT NOT NULL, - operation_type TEXT NOT NULL, - resource_id TEXT NOT NULL, -- normalized request-derived resource identifier (not a generated entity id) - - -- Finalization/replay - http_status INTEGER, -- NULL while IN_PROGRESS; set only on finalized 2xx/terminal 4xx - error_subtype TEXT, -- optional: e.g., already_exists, namespace_not_empty, idempotency_replay_failed - response_summary TEXT, -- minimal body to reproduce equivalent response (JSON string) - response_headers TEXT, -- small whitelisted headers to replay (JSON string) - finalized_at TIMESTAMP, -- when http_status was written - - -- Liveness/ops - created_at TIMESTAMP NOT NULL, - updated_at TIMESTAMP NOT NULL, - heartbeat_at TIMESTAMP, -- updated by owner while IN_PROGRESS - executor_id TEXT, -- owner pod/worker id - expires_at TIMESTAMP, - - PRIMARY KEY (realm_id, idempotency_key) -); - -CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires - ON idempotency_records (realm_id, expires_at); - --- ============================================================================ --- METRICS TABLES (NEW in v4) --- ============================================================================ - --- Scan Metrics Report Table -CREATE TABLE IF NOT EXISTS scan_metrics_report ( - report_id TEXT NOT NULL, - realm_id TEXT NOT NULL, - catalog_id BIGINT NOT NULL, - namespace TEXT NOT NULL, - table_id BIGINT NOT NULL, - - -- Report metadata - timestamp_ms BIGINT NOT NULL, - principal_name TEXT, - request_id TEXT, - - -- Trace correlation - otel_trace_id TEXT, - otel_span_id TEXT, - report_trace_id TEXT, - - -- Scan context - snapshot_id BIGINT, - schema_id INTEGER, - filter_expression TEXT, - projected_field_ids TEXT, - projected_field_names TEXT, - - -- Scan metrics - result_data_files BIGINT DEFAULT 0, - result_delete_files BIGINT DEFAULT 0, - total_file_size_bytes BIGINT DEFAULT 0, - total_data_manifests BIGINT DEFAULT 0, - total_delete_manifests BIGINT DEFAULT 0, - scanned_data_manifests BIGINT DEFAULT 0, - scanned_delete_manifests BIGINT DEFAULT 0, - skipped_data_manifests BIGINT DEFAULT 0, - skipped_delete_manifests BIGINT DEFAULT 0, - skipped_data_files BIGINT DEFAULT 0, - skipped_delete_files BIGINT DEFAULT 0, - total_planning_duration_ms BIGINT DEFAULT 0, - - -- Equality/positional delete metrics - equality_delete_files BIGINT DEFAULT 0, - positional_delete_files BIGINT DEFAULT 0, - indexed_delete_files BIGINT DEFAULT 0, - total_delete_file_size_bytes BIGINT DEFAULT 0, - - -- Additional metadata (for extensibility) - metadata TEXT DEFAULT '{}', - - PRIMARY KEY (realm_id, report_id) -); - -COMMENT ON TABLE scan_metrics_report IS 'Scan metrics reports as first-class entities'; - --- Indexes for scan_metrics_report --- Note: Additional indexes for query patterns (by table, trace_id, principal) can be added --- when analytics APIs are introduced. Currently only timestamp index is needed for retention cleanup. -CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp ON scan_metrics_report(realm_id, timestamp_ms); - --- Junction table for scan metrics report roles -CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( - realm_id TEXT NOT NULL, - report_id TEXT NOT NULL, - role_name TEXT NOT NULL, - PRIMARY KEY (realm_id, report_id, role_name), - FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE -); - -COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; - - --- Commit Metrics Report Entity Table -CREATE TABLE IF NOT EXISTS commit_metrics_report ( - report_id TEXT NOT NULL, - realm_id TEXT NOT NULL, - catalog_id BIGINT NOT NULL, - namespace TEXT NOT NULL, - table_id BIGINT NOT NULL, - - -- Report metadata - timestamp_ms BIGINT NOT NULL, - principal_name TEXT, - request_id TEXT, - - -- Trace correlation - otel_trace_id TEXT, - otel_span_id TEXT, - report_trace_id TEXT, - - -- Commit context - snapshot_id BIGINT NOT NULL, - sequence_number BIGINT, - operation TEXT NOT NULL, - - -- File metrics - added_data_files BIGINT DEFAULT 0, - removed_data_files BIGINT DEFAULT 0, - total_data_files BIGINT DEFAULT 0, - added_delete_files BIGINT DEFAULT 0, - removed_delete_files BIGINT DEFAULT 0, - total_delete_files BIGINT DEFAULT 0, - - -- Equality delete files - added_equality_delete_files BIGINT DEFAULT 0, - removed_equality_delete_files BIGINT DEFAULT 0, - - -- Positional delete files - added_positional_delete_files BIGINT DEFAULT 0, - removed_positional_delete_files BIGINT DEFAULT 0, - - -- Record metrics - added_records BIGINT DEFAULT 0, - removed_records BIGINT DEFAULT 0, - total_records BIGINT DEFAULT 0, - - -- Size metrics - added_file_size_bytes BIGINT DEFAULT 0, - removed_file_size_bytes BIGINT DEFAULT 0, - total_file_size_bytes BIGINT DEFAULT 0, - - -- Duration and attempts - total_duration_ms BIGINT DEFAULT 0, - attempts INTEGER DEFAULT 1, - - -- Additional metadata (for extensibility) - metadata TEXT DEFAULT '{}', - - PRIMARY KEY (realm_id, report_id) -); - -COMMENT ON TABLE commit_metrics_report IS 'Commit metrics reports as first-class entities'; - --- Indexes for commit_metrics_report --- Note: Additional indexes for query patterns (by table, trace_id, principal, operation, snapshot) --- can be added when analytics APIs are introduced. Currently only timestamp index is needed for retention cleanup. -CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp ON commit_metrics_report(realm_id, timestamp_ms); - --- Junction table for commit metrics report roles -CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( - realm_id TEXT NOT NULL, - report_id TEXT NOT NULL, - role_name TEXT NOT NULL, - PRIMARY KEY (realm_id, report_id, role_name), - FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE -); - -COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; + ON idempotency_records (realm_id, expires_at); \ No newline at end of file diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql new file mode 100644 index 0000000000..8da86489ea --- /dev/null +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql @@ -0,0 +1,207 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- ============================================================================ +-- POLARIS METRICS SCHEMA VERSION 1 (PostgreSQL) +-- ============================================================================ +-- This schema is SEPARATE from the entity schema and can evolve independently. +-- It contains tables for storing Iceberg metrics reports. +-- +-- Tables: +-- * `metrics_version` - Version tracking for the metrics schema +-- * `scan_metrics_report` - Scan metrics reports +-- * `scan_metrics_report_roles` - Junction table for principal roles +-- * `commit_metrics_report` - Commit metrics reports +-- * `commit_metrics_report_roles` - Junction table for principal roles +-- ============================================================================ + +CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; +SET search_path TO POLARIS_SCHEMA; + +-- Metrics schema version tracking (separate from entity schema version) +CREATE TABLE IF NOT EXISTS metrics_version ( + version_key TEXT PRIMARY KEY, + version_value INTEGER NOT NULL +); + +INSERT INTO metrics_version (version_key, version_value) +VALUES ('metrics_version', 1) +ON CONFLICT (version_key) DO UPDATE +SET version_value = EXCLUDED.version_value; + +COMMENT ON TABLE metrics_version IS 'the version of the metrics schema in use'; + +-- ============================================================================ +-- SCAN METRICS REPORT TABLE +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS scan_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id BIGINT NOT NULL, + namespace TEXT NOT NULL, + table_id BIGINT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Scan context + snapshot_id BIGINT, + schema_id INTEGER, + filter_expression TEXT, + projected_field_ids TEXT, + projected_field_names TEXT, + + -- Scan metrics + result_data_files BIGINT DEFAULT 0, + result_delete_files BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + total_data_manifests BIGINT DEFAULT 0, + total_delete_manifests BIGINT DEFAULT 0, + scanned_data_manifests BIGINT DEFAULT 0, + scanned_delete_manifests BIGINT DEFAULT 0, + skipped_data_manifests BIGINT DEFAULT 0, + skipped_delete_manifests BIGINT DEFAULT 0, + skipped_data_files BIGINT DEFAULT 0, + skipped_delete_files BIGINT DEFAULT 0, + total_planning_duration_ms BIGINT DEFAULT 0, + + -- Equality/positional delete metrics + equality_delete_files BIGINT DEFAULT 0, + positional_delete_files BIGINT DEFAULT 0, + indexed_delete_files BIGINT DEFAULT 0, + total_delete_file_size_bytes BIGINT DEFAULT 0, + + -- Additional metadata (for extensibility) + metadata JSONB DEFAULT '{}'::JSONB, + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE scan_metrics_report IS 'Scan metrics reports as first-class entities'; +COMMENT ON COLUMN scan_metrics_report.report_id IS 'Unique identifier for the report'; +COMMENT ON COLUMN scan_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; +COMMENT ON COLUMN scan_metrics_report.catalog_id IS 'Catalog ID'; +COMMENT ON COLUMN scan_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; +COMMENT ON COLUMN scan_metrics_report.report_trace_id IS 'Trace ID from report metadata'; + +-- Index for retention cleanup by timestamp +CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp + ON scan_metrics_report(realm_id, timestamp_ms DESC); + +-- Junction table for scan metrics report roles +CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; + +-- ============================================================================ +-- COMMIT METRICS REPORT TABLE +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS commit_metrics_report ( + report_id TEXT NOT NULL, + realm_id TEXT NOT NULL, + catalog_id BIGINT NOT NULL, + namespace TEXT NOT NULL, + table_id BIGINT NOT NULL, + + -- Report metadata + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + request_id TEXT, + + -- Trace correlation + otel_trace_id TEXT, + otel_span_id TEXT, + report_trace_id TEXT, + + -- Commit context + snapshot_id BIGINT NOT NULL, + sequence_number BIGINT, + operation TEXT NOT NULL, + + -- File metrics + added_data_files BIGINT DEFAULT 0, + removed_data_files BIGINT DEFAULT 0, + total_data_files BIGINT DEFAULT 0, + added_delete_files BIGINT DEFAULT 0, + removed_delete_files BIGINT DEFAULT 0, + total_delete_files BIGINT DEFAULT 0, + + -- Equality delete files + added_equality_delete_files BIGINT DEFAULT 0, + removed_equality_delete_files BIGINT DEFAULT 0, + + -- Positional delete files + added_positional_delete_files BIGINT DEFAULT 0, + removed_positional_delete_files BIGINT DEFAULT 0, + + -- Record metrics + added_records BIGINT DEFAULT 0, + removed_records BIGINT DEFAULT 0, + total_records BIGINT DEFAULT 0, + + -- Size metrics + added_file_size_bytes BIGINT DEFAULT 0, + removed_file_size_bytes BIGINT DEFAULT 0, + total_file_size_bytes BIGINT DEFAULT 0, + + -- Duration and attempts + total_duration_ms BIGINT DEFAULT 0, + attempts INTEGER DEFAULT 1, + + -- Additional metadata (for extensibility) + metadata JSONB DEFAULT '{}'::JSONB, + + PRIMARY KEY (realm_id, report_id) +); + +COMMENT ON TABLE commit_metrics_report IS 'Commit metrics reports as first-class entities'; +COMMENT ON COLUMN commit_metrics_report.report_id IS 'Unique identifier for the report'; +COMMENT ON COLUMN commit_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; +COMMENT ON COLUMN commit_metrics_report.operation IS 'Commit operation type: append, overwrite, delete, replace'; +COMMENT ON COLUMN commit_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; + +-- Index for retention cleanup by timestamp +CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp + ON commit_metrics_report(realm_id, timestamp_ms DESC); + +-- Junction table for commit metrics report roles +CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( + realm_id TEXT NOT NULL, + report_id TEXT NOT NULL, + role_name TEXT NOT NULL, + PRIMARY KEY (realm_id, report_id, role_name), + FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE +); + +COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; + diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql index 530fc969d9..b1a5b5870b 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql @@ -16,20 +16,9 @@ -- specific language governing permissions and limitations -- under the License. --- ============================================================================ --- POLARIS JDBC SCHEMA VERSION 4 (PostgreSQL) --- ============================================================================ --- This schema is SELF-CONTAINED and can be used for fresh installs. --- Each schema version includes ALL tables, not just incremental changes. --- --- Changes from v3: --- * Added `events` table --- * Added `idempotency_records` table for REST idempotency --- * Added `scan_metrics_report` table for scan metrics as first-class entities --- * Added `scan_metrics_report_roles` junction table for principal roles --- * Added `commit_metrics_report` table for commit metrics as first-class entities --- * Added `commit_metrics_report_roles` junction table for principal roles --- ============================================================================ +-- Changes from v2: +-- * Added `events` table +-- * Added `idempotency_records` table for REST idempotency CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; SET search_path TO POLARIS_SCHEMA; @@ -133,10 +122,6 @@ CREATE TABLE IF NOT EXISTS policy_mapping_record ( CREATE INDEX IF NOT EXISTS idx_policy_mapping_record ON policy_mapping_record (realm_id, policy_type_code, policy_catalog_id, policy_id, target_catalog_id, target_id); --- ============================================================================ --- EVENTS TABLE (NEW in v4) --- ============================================================================ - CREATE TABLE IF NOT EXISTS events ( realm_id TEXT NOT NULL, catalog_id TEXT NOT NULL, @@ -151,10 +136,7 @@ CREATE TABLE IF NOT EXISTS events ( PRIMARY KEY (event_id) ); --- ============================================================================ --- IDEMPOTENCY RECORDS TABLE (NEW in v4) --- ============================================================================ - +-- Idempotency records (key-only idempotency; durable replay) CREATE TABLE IF NOT EXISTS idempotency_records ( realm_id TEXT NOT NULL, idempotency_key TEXT NOT NULL, @@ -181,165 +163,3 @@ CREATE TABLE IF NOT EXISTS idempotency_records ( -- Helpful indexes CREATE INDEX IF NOT EXISTS idx_idemp_realm_expires ON idempotency_records (realm_id, expires_at); - --- ============================================================================ --- METRICS TABLES (NEW in v4) --- ============================================================================ - --- Scan Metrics Report Table -CREATE TABLE IF NOT EXISTS scan_metrics_report ( - report_id TEXT NOT NULL, - realm_id TEXT NOT NULL, - catalog_id BIGINT NOT NULL, - namespace TEXT NOT NULL, - table_id BIGINT NOT NULL, - - -- Report metadata - timestamp_ms BIGINT NOT NULL, - principal_name TEXT, - request_id TEXT, - - -- Trace correlation - otel_trace_id TEXT, - otel_span_id TEXT, - report_trace_id TEXT, - - -- Scan context - snapshot_id BIGINT, - schema_id INTEGER, - filter_expression TEXT, - projected_field_ids TEXT, - projected_field_names TEXT, - - -- Scan metrics - result_data_files BIGINT DEFAULT 0, - result_delete_files BIGINT DEFAULT 0, - total_file_size_bytes BIGINT DEFAULT 0, - total_data_manifests BIGINT DEFAULT 0, - total_delete_manifests BIGINT DEFAULT 0, - scanned_data_manifests BIGINT DEFAULT 0, - scanned_delete_manifests BIGINT DEFAULT 0, - skipped_data_manifests BIGINT DEFAULT 0, - skipped_delete_manifests BIGINT DEFAULT 0, - skipped_data_files BIGINT DEFAULT 0, - skipped_delete_files BIGINT DEFAULT 0, - total_planning_duration_ms BIGINT DEFAULT 0, - - -- Equality/positional delete metrics - equality_delete_files BIGINT DEFAULT 0, - positional_delete_files BIGINT DEFAULT 0, - indexed_delete_files BIGINT DEFAULT 0, - total_delete_file_size_bytes BIGINT DEFAULT 0, - - -- Additional metadata (for extensibility) - metadata JSONB DEFAULT '{}'::JSONB, - - PRIMARY KEY (realm_id, report_id) -); - -COMMENT ON TABLE scan_metrics_report IS 'Scan metrics reports as first-class entities'; -COMMENT ON COLUMN scan_metrics_report.report_id IS 'Unique identifier for the report'; -COMMENT ON COLUMN scan_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; -COMMENT ON COLUMN scan_metrics_report.catalog_id IS 'Catalog ID'; -COMMENT ON COLUMN scan_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; -COMMENT ON COLUMN scan_metrics_report.report_trace_id IS 'Trace ID from report metadata'; - --- Indexes for scan_metrics_report --- Note: Additional indexes for query patterns (by table, trace_id, principal) can be added --- when analytics APIs are introduced. Currently only timestamp index is needed for retention cleanup. -CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp - ON scan_metrics_report(realm_id, timestamp_ms DESC); - --- Junction table for scan metrics report roles -CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( - realm_id TEXT NOT NULL, - report_id TEXT NOT NULL, - role_name TEXT NOT NULL, - PRIMARY KEY (realm_id, report_id, role_name), - FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE -); - -COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; - - --- Commit Metrics Report Entity Table -CREATE TABLE IF NOT EXISTS commit_metrics_report ( - report_id TEXT NOT NULL, - realm_id TEXT NOT NULL, - catalog_id BIGINT NOT NULL, - namespace TEXT NOT NULL, - table_id BIGINT NOT NULL, - - -- Report metadata - timestamp_ms BIGINT NOT NULL, - principal_name TEXT, - request_id TEXT, - - -- Trace correlation - otel_trace_id TEXT, - otel_span_id TEXT, - report_trace_id TEXT, - - -- Commit context - snapshot_id BIGINT NOT NULL, - sequence_number BIGINT, - operation TEXT NOT NULL, - - -- File metrics - added_data_files BIGINT DEFAULT 0, - removed_data_files BIGINT DEFAULT 0, - total_data_files BIGINT DEFAULT 0, - added_delete_files BIGINT DEFAULT 0, - removed_delete_files BIGINT DEFAULT 0, - total_delete_files BIGINT DEFAULT 0, - - -- Equality delete files - added_equality_delete_files BIGINT DEFAULT 0, - removed_equality_delete_files BIGINT DEFAULT 0, - - -- Positional delete files - added_positional_delete_files BIGINT DEFAULT 0, - removed_positional_delete_files BIGINT DEFAULT 0, - - -- Record metrics - added_records BIGINT DEFAULT 0, - removed_records BIGINT DEFAULT 0, - total_records BIGINT DEFAULT 0, - - -- Size metrics - added_file_size_bytes BIGINT DEFAULT 0, - removed_file_size_bytes BIGINT DEFAULT 0, - total_file_size_bytes BIGINT DEFAULT 0, - - -- Duration and attempts - total_duration_ms BIGINT DEFAULT 0, - attempts INTEGER DEFAULT 1, - - -- Additional metadata (for extensibility) - metadata JSONB DEFAULT '{}'::JSONB, - - PRIMARY KEY (realm_id, report_id) -); - -COMMENT ON TABLE commit_metrics_report IS 'Commit metrics reports as first-class entities'; -COMMENT ON COLUMN commit_metrics_report.report_id IS 'Unique identifier for the report'; -COMMENT ON COLUMN commit_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; -COMMENT ON COLUMN commit_metrics_report.operation IS 'Commit operation type: append, overwrite, delete, replace'; -COMMENT ON COLUMN commit_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; - --- Indexes for commit_metrics_report --- Note: Additional indexes for query patterns (by table, trace_id, principal, operation, snapshot) --- can be added when analytics APIs are introduced. Currently only timestamp index is needed for retention cleanup. -CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp - ON commit_metrics_report(realm_id, timestamp_ms DESC); - --- Junction table for commit metrics report roles -CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( - realm_id TEXT NOT NULL, - report_id TEXT NOT NULL, - role_name TEXT NOT NULL, - PRIMARY KEY (realm_id, report_id, role_name), - FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE -); - -COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/bootstrap/SchemaOptions.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/bootstrap/SchemaOptions.java index 5cfc20a889..8798a66f93 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/bootstrap/SchemaOptions.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/bootstrap/SchemaOptions.java @@ -21,8 +21,15 @@ import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; +import org.immutables.value.Value; @PolarisImmutable public interface SchemaOptions { Optional schemaVersion(); + + /** Whether to include the metrics schema during bootstrap. Defaults to false. */ + @Value.Default + default boolean includeMetrics() { + return false; + } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java index 2986beb0fb..6d67408cba 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; @@ -31,7 +32,11 @@ * *

      Note: Realm ID is not included in this record. Multi-tenancy realm context should be obtained * from the CDI-injected {@code RealmContext} at persistence time. + * + *

      Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. */ +@Beta @PolarisImmutable public interface CommitMetricsRecord extends MetricsRecordIdentity { diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java index ae7e6f7ec0..1e9865701e 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import jakarta.annotation.Nonnull; import org.apache.polaris.core.persistence.pagination.Page; import org.apache.polaris.core.persistence.pagination.PageToken; @@ -72,10 +73,14 @@ *

      The {@link ReportIdToken} provides a reference cursor implementation based on report ID * (UUID), but backends may use other cursor strategies internally. * + *

      Note: This SPI is currently experimental and not yet implemented in all persistence + * backends. The API may change in future releases. + * * @see PageToken * @see Page * @see ReportIdToken */ +@Beta public interface MetricsPersistence { /** A no-op implementation for backends that don't support metrics persistence. */ diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index a6bf10b952..210fa39096 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import java.time.Instant; import java.util.Map; import java.util.Optional; @@ -49,6 +50,9 @@ * the {@link #metadata()} filter map. Client-provided correlation data should be stored in the * metrics record's metadata map and can be filtered using the metadata criteria. * + *

      Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. + * *

      Pagination

      * *

      Pagination is handled via the {@link org.apache.polaris.core.persistence.pagination.PageToken} @@ -66,6 +70,7 @@ * @see org.apache.polaris.core.persistence.pagination.Page * @see ReportIdToken */ +@Beta @PolarisImmutable public interface MetricsQueryCriteria { diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index 7d31302e54..077df90b97 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import java.time.Instant; import java.util.List; import java.util.Map; @@ -45,7 +46,11 @@ *

      Realm ID: Realm ID is intentionally not included in this interface. Multi-tenancy realm * context should be obtained from the CDI-injected {@code RealmContext} at persistence time. This * keeps catalog-specific code from needing to manage realm concerns. + * + *

      Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. */ +@Beta public interface MetricsRecordIdentity { /** diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java index c4e4ec6320..f3e3846953 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.google.common.annotations.Beta; import jakarta.annotation.Nullable; import org.apache.polaris.core.persistence.pagination.Token; import org.apache.polaris.immutables.PolarisImmutable; @@ -54,7 +55,11 @@ *

    • NoSQL: Use report ID as partition/sort key cursor *
    • Time-series: Combine with timestamp for efficient range scans * + * + *

      Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. */ +@Beta @PolarisImmutable @JsonSerialize(as = ImmutableReportIdToken.class) @JsonDeserialize(as = ImmutableReportIdToken.class) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java index b9fd79ec29..44947d8f75 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import java.util.List; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; @@ -32,7 +33,11 @@ * *

      Note: Realm ID is not included in this record. Multi-tenancy realm context should be obtained * from the CDI-injected {@code RealmContext} at persistence time. + * + *

      Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. */ +@Beta @PolarisImmutable public interface ScanMetricsRecord extends MetricsRecordIdentity { diff --git a/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapCommand.java b/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapCommand.java index 82d92f4e18..53c89ddd5c 100644 --- a/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapCommand.java +++ b/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapCommand.java @@ -93,6 +93,11 @@ static class SchemaInputOptions { paramLabel = "", description = "The version of the schema to load in [1, 2, 3, LATEST].") Integer schemaVersion; + + @CommandLine.Option( + names = {"--include-metrics"}, + description = "Include metrics schema tables during bootstrap.") + boolean includeMetrics; } } @@ -136,6 +141,10 @@ public Integer call() { builder.schemaVersion(inputOptions.schemaInputOptions.schemaVersion); } + if (inputOptions.schemaInputOptions.includeMetrics) { + builder.includeMetrics(true); + } + schemaOptions = builder.build(); } else { schemaOptions = ImmutableSchemaOptions.builder().build(); From 3eb99124bcf68d198a8ac4faeea20fd8c8d6fafc Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 19:35:17 -0800 Subject: [PATCH 53/67] refactor: Denormalize principal roles into JSON array column Replace junction tables (scan_metrics_report_roles, commit_metrics_report_roles) with a denormalized principal_role_ids JSON array column in both scan_metrics_report and commit_metrics_report tables. This simplifies the schema and reduces the number of tables from 5 to 3. --- .../main/resources/h2/schema-metrics-v1.sql | 31 ++++------------- .../resources/postgres/schema-metrics-v1.sql | 33 +++++-------------- 2 files changed, 14 insertions(+), 50 deletions(-) diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql index 29e7ea0644..0949fe59cd 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql @@ -26,9 +26,7 @@ -- Tables: -- * `metrics_version` - Version tracking for the metrics schema -- * `scan_metrics_report` - Scan metrics reports --- * `scan_metrics_report_roles` - Junction table for principal roles -- * `commit_metrics_report` - Commit metrics reports --- * `commit_metrics_report_roles` - Junction table for principal roles -- ============================================================================ CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; @@ -94,6 +92,9 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( indexed_delete_files BIGINT DEFAULT 0, total_delete_file_size_bytes BIGINT DEFAULT 0, + -- Principal roles (denormalized as JSON array) + principal_role_ids TEXT DEFAULT '[]', + -- Additional metadata (for extensibility) metadata TEXT DEFAULT '{}', @@ -105,17 +106,6 @@ COMMENT ON TABLE scan_metrics_report IS 'Scan metrics reports as first-class ent -- Index for retention cleanup by timestamp CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp ON scan_metrics_report(realm_id, timestamp_ms); --- Junction table for scan metrics report roles -CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( - realm_id TEXT NOT NULL, - report_id TEXT NOT NULL, - role_name TEXT NOT NULL, - PRIMARY KEY (realm_id, report_id, role_name), - FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE -); - -COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; - -- ============================================================================ -- COMMIT METRICS REPORT TABLE -- ============================================================================ @@ -172,6 +162,9 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( total_duration_ms BIGINT DEFAULT 0, attempts INTEGER DEFAULT 1, + -- Principal roles (denormalized as JSON array) + principal_role_ids TEXT DEFAULT '[]', + -- Additional metadata (for extensibility) metadata TEXT DEFAULT '{}', @@ -182,15 +175,3 @@ COMMENT ON TABLE commit_metrics_report IS 'Commit metrics reports as first-class -- Index for retention cleanup by timestamp CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp ON commit_metrics_report(realm_id, timestamp_ms); - --- Junction table for commit metrics report roles -CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( - realm_id TEXT NOT NULL, - report_id TEXT NOT NULL, - role_name TEXT NOT NULL, - PRIMARY KEY (realm_id, report_id, role_name), - FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE -); - -COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; - diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql index 8da86489ea..596779ad89 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql @@ -25,9 +25,7 @@ -- Tables: -- * `metrics_version` - Version tracking for the metrics schema -- * `scan_metrics_report` - Scan metrics reports --- * `scan_metrics_report_roles` - Junction table for principal roles -- * `commit_metrics_report` - Commit metrics reports --- * `commit_metrics_report_roles` - Junction table for principal roles -- ============================================================================ CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; @@ -94,6 +92,9 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( indexed_delete_files BIGINT DEFAULT 0, total_delete_file_size_bytes BIGINT DEFAULT 0, + -- Principal roles (denormalized as JSON array) + principal_role_ids JSONB DEFAULT '[]'::JSONB, + -- Additional metadata (for extensibility) metadata JSONB DEFAULT '{}'::JSONB, @@ -106,22 +107,12 @@ COMMENT ON COLUMN scan_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; COMMENT ON COLUMN scan_metrics_report.catalog_id IS 'Catalog ID'; COMMENT ON COLUMN scan_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; COMMENT ON COLUMN scan_metrics_report.report_trace_id IS 'Trace ID from report metadata'; +COMMENT ON COLUMN scan_metrics_report.principal_role_ids IS 'JSON array of activated principal role IDs'; -- Index for retention cleanup by timestamp CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp ON scan_metrics_report(realm_id, timestamp_ms DESC); --- Junction table for scan metrics report roles -CREATE TABLE IF NOT EXISTS scan_metrics_report_roles ( - realm_id TEXT NOT NULL, - report_id TEXT NOT NULL, - role_name TEXT NOT NULL, - PRIMARY KEY (realm_id, report_id, role_name), - FOREIGN KEY (realm_id, report_id) REFERENCES scan_metrics_report(realm_id, report_id) ON DELETE CASCADE -); - -COMMENT ON TABLE scan_metrics_report_roles IS 'Activated principal roles for scan metrics reports'; - -- ============================================================================ -- COMMIT METRICS REPORT TABLE -- ============================================================================ @@ -178,6 +169,9 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( total_duration_ms BIGINT DEFAULT 0, attempts INTEGER DEFAULT 1, + -- Principal roles (denormalized as JSON array) + principal_role_ids JSONB DEFAULT '[]'::JSONB, + -- Additional metadata (for extensibility) metadata JSONB DEFAULT '{}'::JSONB, @@ -189,19 +183,8 @@ COMMENT ON COLUMN commit_metrics_report.report_id IS 'Unique identifier for the COMMENT ON COLUMN commit_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; COMMENT ON COLUMN commit_metrics_report.operation IS 'Commit operation type: append, overwrite, delete, replace'; COMMENT ON COLUMN commit_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; +COMMENT ON COLUMN commit_metrics_report.principal_role_ids IS 'JSON array of activated principal role IDs'; -- Index for retention cleanup by timestamp CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp ON commit_metrics_report(realm_id, timestamp_ms DESC); - --- Junction table for commit metrics report roles -CREATE TABLE IF NOT EXISTS commit_metrics_report_roles ( - realm_id TEXT NOT NULL, - report_id TEXT NOT NULL, - role_name TEXT NOT NULL, - PRIMARY KEY (realm_id, report_id, role_name), - FOREIGN KEY (realm_id, report_id) REFERENCES commit_metrics_report(realm_id, report_id) ON DELETE CASCADE -); - -COMMENT ON TABLE commit_metrics_report_roles IS 'Activated principal roles for commit metrics reports'; - From 41bb8108f576b3bc13d59e1396b83c8f12aa6ed8 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 19:49:35 -0800 Subject: [PATCH 54/67] refactor: Remove metrics event types Remove BEFORE_REPORT_METRICS and AFTER_REPORT_METRICS event types as requested in PR review - events for metrics can be added in a separate PR if needed. --- .../iceberg/IcebergRestCatalogEventServiceDelegator.java | 1 + .../org/apache/polaris/service/events/PolarisEventType.java | 6 +----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java index 950e218be6..00d0f6e08f 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergRestCatalogEventServiceDelegator.java @@ -350,6 +350,7 @@ public Response loadTable( snapshots, realmContext, securityContext); + polarisEventListener.onEvent( new PolarisEvent( PolarisEventType.AFTER_LOAD_TABLE, diff --git a/runtime/service/src/main/java/org/apache/polaris/service/events/PolarisEventType.java b/runtime/service/src/main/java/org/apache/polaris/service/events/PolarisEventType.java index a11d047b89..de6b994608 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/events/PolarisEventType.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/events/PolarisEventType.java @@ -193,11 +193,7 @@ public enum PolarisEventType { AFTER_ATTEMPT_TASK(1401), // Rate Limiting Events - BEFORE_LIMIT_REQUEST_RATE(1500), - - // Metrics Reporting Events - BEFORE_REPORT_METRICS(1600), - AFTER_REPORT_METRICS(1601); + BEFORE_LIMIT_REQUEST_RATE(1500); private final int code; From 5d831cdad6e6709eb7af2b07e1d53345b3dfa0a3 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 19:57:06 -0800 Subject: [PATCH 55/67] refactor: Decouple MetricsPersistence from MetaStoreManagerFactory Per PR review feedback, MetricsPersistence is now injected directly via CDI instead of being obtained through MetaStoreManagerFactory.getOrCreateMetricsPersistence(). Changes: - Remove getOrCreateMetricsPersistence() from MetaStoreManagerFactory interface - Add @RequestScoped CDI producer for MetricsPersistence in ServiceProducers - Update PersistingMetricsReporter to inject MetricsPersistence directly - Update PersistingMetricsReporterTest to use direct injection This allows MetricsPersistence to be a request-scoped bean created on demand, independent of the entity persistence backend. Persistence backends that support metrics storage can provide an alternative CDI producer. --- .../persistence/MetaStoreManagerFactory.java | 16 ---------------- .../service/config/ServiceProducers.java | 15 +++++++++++++++ .../reporting/PersistingMetricsReporter.java | 19 +++++-------------- .../PersistingMetricsReporterTest.java | 13 +------------ 4 files changed, 21 insertions(+), 42 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/MetaStoreManagerFactory.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/MetaStoreManagerFactory.java index 142eee5233..4a32a88591 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/MetaStoreManagerFactory.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/MetaStoreManagerFactory.java @@ -26,7 +26,6 @@ import org.apache.polaris.core.persistence.cache.EntityCache; import org.apache.polaris.core.persistence.dao.entity.BaseResult; import org.apache.polaris.core.persistence.dao.entity.PrincipalSecretsResult; -import org.apache.polaris.core.persistence.metrics.MetricsPersistence; /** Configuration interface for configuring the {@link PolarisMetaStoreManager}. */ public interface MetaStoreManagerFactory { @@ -37,21 +36,6 @@ public interface MetaStoreManagerFactory { EntityCache getOrCreateEntityCache(RealmContext realmContext, RealmConfig realmConfig); - /** - * Gets or creates a metrics persistence instance for the given realm context. - * - *

      The default implementation returns a no-op implementation that silently ignores write - * operations and returns empty pages for queries. Persistence backends that support metrics - * storage (e.g., JDBC with schema v4+) should override this method to provide a functional - * implementation. - * - * @param realmContext the realm context - * @return a MetricsPersistence implementation for the realm - */ - default MetricsPersistence getOrCreateMetricsPersistence(RealmContext realmContext) { - return MetricsPersistence.NOOP; - } - Map bootstrapRealms( Iterable realms, RootCredentialsSet rootCredentialsSet); diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 121eb382c1..6e003ba951 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -47,6 +47,7 @@ import org.apache.polaris.core.persistence.BasePersistence; import org.apache.polaris.core.persistence.MetaStoreManagerFactory; import org.apache.polaris.core.persistence.PolarisMetaStoreManager; +import org.apache.polaris.core.persistence.metrics.MetricsPersistence; import org.apache.polaris.core.persistence.bootstrap.RootCredentialsSet; import org.apache.polaris.core.persistence.cache.EntityCache; import org.apache.polaris.core.persistence.resolver.ResolutionManifestFactory; @@ -224,6 +225,20 @@ public PolarisMetaStoreManager polarisMetaStoreManager( return metaStoreManagerFactory.getOrCreateMetaStoreManager(realmContext); } + /** + * Produces a {@link MetricsPersistence} bean for the current request. The default implementation + * returns a no-op instance. Persistence backends that support metrics storage (e.g., JDBC with + * metrics schema) should provide an alternative producer that returns a functional + * implementation. + * + * @return a MetricsPersistence implementation for the current realm + */ + @Produces + @RequestScoped + public MetricsPersistence metricsPersistence() { + return MetricsPersistence.NOOP; + } + @Produces @RequestScoped public StorageCredentialsVendor storageCredentialsVendor( diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java index 7391ab9533..9ed8fa1c52 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java @@ -30,14 +30,12 @@ import org.apache.iceberg.metrics.MetricsReport; import org.apache.iceberg.metrics.ScanReport; import org.apache.polaris.core.context.CallContext; -import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.core.entity.PolarisBaseEntity; import org.apache.polaris.core.entity.PolarisEntity; import org.apache.polaris.core.entity.PolarisEntityCore; import org.apache.polaris.core.entity.PolarisEntitySubType; import org.apache.polaris.core.entity.PolarisEntityType; import org.apache.polaris.core.metrics.iceberg.MetricsRecordConverter; -import org.apache.polaris.core.persistence.MetaStoreManagerFactory; import org.apache.polaris.core.persistence.PolarisMetaStoreManager; import org.apache.polaris.core.persistence.dao.entity.EntityResult; import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; @@ -65,21 +63,18 @@ public class PersistingMetricsReporter implements PolarisMetricsReporter { private static final Logger LOGGER = LoggerFactory.getLogger(PersistingMetricsReporter.class); - private final RealmContext realmContext; private final CallContext callContext; private final PolarisMetaStoreManager metaStoreManager; - private final MetaStoreManagerFactory metaStoreManagerFactory; + private final MetricsPersistence metricsPersistence; @Inject public PersistingMetricsReporter( - RealmContext realmContext, CallContext callContext, PolarisMetaStoreManager metaStoreManager, - MetaStoreManagerFactory metaStoreManagerFactory) { - this.realmContext = realmContext; + MetricsPersistence metricsPersistence) { this.callContext = callContext; this.metaStoreManager = metaStoreManager; - this.metaStoreManagerFactory = metaStoreManagerFactory; + this.metricsPersistence = metricsPersistence; } @Override @@ -89,10 +84,6 @@ public void reportMetric( MetricsReport metricsReport, Instant receivedTimestamp) { - // Get the MetricsPersistence implementation for this realm - MetricsPersistence persistence = - metaStoreManagerFactory.getOrCreateMetricsPersistence(realmContext); - // Look up the catalog entity to get the catalog ID EntityResult catalogResult = metaStoreManager.readEntityByName( @@ -167,7 +158,7 @@ public void reportMetric( .namespace(namespace) .timestamp(receivedTimestamp) .build(); - persistence.writeScanReport(record); + metricsPersistence.writeScanReport(record); LOGGER.debug( "Persisted scan metrics for {}.{} (reportId={})", catalogName, table, record.reportId()); } else if (metricsReport instanceof CommitReport commitReport) { @@ -178,7 +169,7 @@ public void reportMetric( .namespace(namespace) .timestamp(receivedTimestamp) .build(); - persistence.writeCommitReport(record); + metricsPersistence.writeCommitReport(record); LOGGER.debug( "Persisted commit metrics for {}.{} (reportId={})", catalogName, diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java index c33dc7bfbc..c017f9284f 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java @@ -44,11 +44,9 @@ import org.apache.iceberg.metrics.ScanReport; import org.apache.polaris.core.PolarisCallContext; import org.apache.polaris.core.context.CallContext; -import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.core.entity.PolarisBaseEntity; import org.apache.polaris.core.entity.PolarisEntitySubType; import org.apache.polaris.core.entity.PolarisEntityType; -import org.apache.polaris.core.persistence.MetaStoreManagerFactory; import org.apache.polaris.core.persistence.PolarisMetaStoreManager; import org.apache.polaris.core.persistence.dao.entity.BaseResult; import org.apache.polaris.core.persistence.dao.entity.EntityResult; @@ -69,31 +67,22 @@ public class PersistingMetricsReporterTest { private static final TableIdentifier TABLE_IDENTIFIER = TableIdentifier.of(Namespace.of("db", "schema"), TABLE_NAME); - private RealmContext realmContext; private CallContext callContext; private PolarisCallContext polarisCallContext; private PolarisMetaStoreManager metaStoreManager; - private MetaStoreManagerFactory metaStoreManagerFactory; private MetricsPersistence metricsPersistence; private PersistingMetricsReporter reporter; @BeforeEach void setUp() { - realmContext = () -> "test-realm"; polarisCallContext = mock(PolarisCallContext.class); callContext = mock(CallContext.class); when(callContext.getPolarisCallContext()).thenReturn(polarisCallContext); metaStoreManager = mock(PolarisMetaStoreManager.class); - metaStoreManagerFactory = mock(MetaStoreManagerFactory.class); metricsPersistence = mock(MetricsPersistence.class); - when(metaStoreManagerFactory.getOrCreateMetricsPersistence(realmContext)) - .thenReturn(metricsPersistence); - - reporter = - new PersistingMetricsReporter( - realmContext, callContext, metaStoreManager, metaStoreManagerFactory); + reporter = new PersistingMetricsReporter(callContext, metaStoreManager, metricsPersistence); } @Test From 2f46564fdb6fa27e1104c259ea263114e650ca4b Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 20:18:01 -0800 Subject: [PATCH 56/67] test: Add unit tests for principal_role_ids in metrics report models - Updated ModelScanMetricsReportTest and ModelCommitMetricsReportTest - Added tests for principal_role_ids JSON parsing in fromResultSet - Added tests for principal_role_ids serialization in toMap (H2 and Postgres) - Added tests for the separate Converter classes used in query methods - Updated createTestReport() to include roles --- .../jdbc/JdbcBasePersistenceImpl.java | 188 ++---------------- .../jdbc/models/ModelCommitMetricsReport.java | 36 +++- .../ModelCommitMetricsReportConverter.java | 21 ++ .../jdbc/models/ModelScanMetricsReport.java | 36 +++- .../ModelScanMetricsReportConverter.java | 21 ++ .../jdbc/MetricsReportPersistenceTest.java | 14 +- .../models/ModelCommitMetricsReportTest.java | 65 ++++++ .../models/ModelScanMetricsReportTest.java | 65 ++++++ 8 files changed, 271 insertions(+), 175 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index a812412b5f..f5764fd143 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -355,32 +355,15 @@ public void writeScanMetricsReport(@Nonnull ModelScanMetricsReport report) { return; } try { - datasourceOperations.runWithinTransaction( - connection -> { - PreparedQuery pq = - QueryGenerator.generateInsertQueryWithoutRealmId( - ModelScanMetricsReport.ALL_COLUMNS, - ModelScanMetricsReport.TABLE_NAME, - report.toMap(datasourceOperations.getDatabaseType()).values().stream() - .toList()); - int updated = datasourceOperations.execute(connection, pq); - if (updated == 0) { - throw new SQLException("Scan metrics report was not inserted."); - } - - // Insert roles into junction table (filter out null/blank values) - for (String role : report.getRoles()) { - if (role != null && !role.isBlank()) { - PreparedQuery roleQuery = - QueryGenerator.generateInsertQueryWithoutRealmId( - List.of("realm_id", "report_id", "role_name"), - "SCAN_METRICS_REPORT_ROLES", - List.of(report.getRealmId(), report.getReportId(), role)); - datasourceOperations.execute(connection, roleQuery); - } - } - return true; - }); + PreparedQuery pq = + QueryGenerator.generateInsertQueryWithoutRealmId( + ModelScanMetricsReport.ALL_COLUMNS, + ModelScanMetricsReport.TABLE_NAME, + report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); + int updated = datasourceOperations.executeUpdate(pq); + if (updated == 0) { + throw new SQLException("Scan metrics report was not inserted."); + } } catch (SQLException e) { throw new RuntimeException( String.format("Failed to write scan metrics report due to %s", e.getMessage()), e); @@ -402,136 +385,21 @@ public void writeCommitMetricsReport(@Nonnull ModelCommitMetricsReport report) { return; } try { - datasourceOperations.runWithinTransaction( - connection -> { - PreparedQuery pq = - QueryGenerator.generateInsertQueryWithoutRealmId( - ModelCommitMetricsReport.ALL_COLUMNS, - ModelCommitMetricsReport.TABLE_NAME, - report.toMap(datasourceOperations.getDatabaseType()).values().stream() - .toList()); - int updated = datasourceOperations.execute(connection, pq); - if (updated == 0) { - throw new SQLException("Commit metrics report was not inserted."); - } - - // Insert roles into junction table (filter out null/blank values) - for (String role : report.getRoles()) { - if (role != null && !role.isBlank()) { - PreparedQuery roleQuery = - QueryGenerator.generateInsertQueryWithoutRealmId( - List.of("realm_id", "report_id", "role_name"), - "COMMIT_METRICS_REPORT_ROLES", - List.of(report.getRealmId(), report.getReportId(), role)); - datasourceOperations.execute(connection, roleQuery); - } - } - return true; - }); + PreparedQuery pq = + QueryGenerator.generateInsertQueryWithoutRealmId( + ModelCommitMetricsReport.ALL_COLUMNS, + ModelCommitMetricsReport.TABLE_NAME, + report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); + int updated = datasourceOperations.executeUpdate(pq); + if (updated == 0) { + throw new SQLException("Commit metrics report was not inserted."); + } } catch (SQLException e) { throw new RuntimeException( String.format("Failed to write commit metrics report due to %s", e.getMessage()), e); } } - /** Simple converter to extract role_name from ResultSet. */ - private static final RoleNameConverter ROLE_NAME_CONVERTER = new RoleNameConverter(); - - /** Converter class that extracts just the role_name column from a ResultSet. */ - private static class RoleNameConverter implements Converter { - @Override - public String fromResultSet(java.sql.ResultSet rs) throws SQLException { - return rs.getString("role_name"); - } - - @Override - public Map toMap(DatabaseType databaseType) { - throw new UnsupportedOperationException("RoleNameConverter is read-only"); - } - } - - /** - * Loads roles from the scan_metrics_report_roles junction table for the given reports. - * - * @param reports the reports to populate with roles - * @return new list with roles populated - */ - private List loadScanMetricsReportRoles( - List reports) { - if (reports.isEmpty()) { - return reports; - } - try { - // Build a map of reportId -> Set roles - Map> rolesByReportId = new HashMap<>(); - for (ModelScanMetricsReport report : reports) { - String sql = - "SELECT role_name FROM " - + QueryGenerator.getFullyQualifiedTableName("SCAN_METRICS_REPORT_ROLES") - + " WHERE realm_id = ? AND report_id = ?"; - PreparedQuery query = new PreparedQuery(sql, List.of(realmId, report.getReportId())); - List roles = datasourceOperations.executeSelect(query, ROLE_NAME_CONVERTER); - if (roles != null && !roles.isEmpty()) { - rolesByReportId.put(report.getReportId(), new HashSet<>(roles)); - } - } - - // Rebuild reports with roles populated - return reports.stream() - .map( - r -> - ImmutableModelScanMetricsReport.builder() - .from(r) - .roles(rolesByReportId.getOrDefault(r.getReportId(), Set.of())) - .build()) - .toList(); - } catch (SQLException e) { - LOGGER.warn("Failed to load roles for scan metrics reports: {}", e.getMessage(), e); - return reports; // Return reports without roles on error - } - } - - /** - * Loads roles from the commit_metrics_report_roles junction table for the given reports. - * - * @param reports the reports to populate with roles - * @return new list with roles populated - */ - private List loadCommitMetricsReportRoles( - List reports) { - if (reports.isEmpty()) { - return reports; - } - try { - // Build a map of reportId -> Set roles - Map> rolesByReportId = new HashMap<>(); - for (ModelCommitMetricsReport report : reports) { - String sql = - "SELECT role_name FROM " - + QueryGenerator.getFullyQualifiedTableName("COMMIT_METRICS_REPORT_ROLES") - + " WHERE realm_id = ? AND report_id = ?"; - PreparedQuery query = new PreparedQuery(sql, List.of(realmId, report.getReportId())); - List roles = datasourceOperations.executeSelect(query, ROLE_NAME_CONVERTER); - if (roles != null && !roles.isEmpty()) { - rolesByReportId.put(report.getReportId(), new HashSet<>(roles)); - } - } - - // Rebuild reports with roles populated - return reports.stream() - .map( - r -> - ImmutableModelCommitMetricsReport.builder() - .from(r) - .roles(rolesByReportId.getOrDefault(r.getReportId(), Set.of())) - .build()) - .toList(); - } catch (SQLException e) { - LOGGER.warn("Failed to load roles for commit metrics reports: {}", e.getMessage(), e); - return reports; // Return reports without roles on error - } - } - /** * Retrieves scan metrics reports for a specific table within a time range. * @@ -600,10 +468,7 @@ public List queryScanMetricsReports( PreparedQuery query = new PreparedQuery(sql, values); var results = datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); - if (results == null || results.isEmpty()) { - return Collections.emptyList(); - } - return loadScanMetricsReportRoles(results); + return results == null ? Collections.emptyList() : results; } catch (SQLException e) { throw new RuntimeException( String.format("Failed to query scan metrics reports due to %s", e.getMessage()), e); @@ -666,10 +531,7 @@ public List queryCommitMetricsReports( PreparedQuery query = new PreparedQuery(sql, values); var results = datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); - if (results == null || results.isEmpty()) { - return Collections.emptyList(); - } - return loadCommitMetricsReportRoles(results); + return results == null ? Collections.emptyList() : results; } catch (SQLException e) { throw new RuntimeException( String.format("Failed to query commit metrics reports due to %s", e.getMessage()), e); @@ -699,10 +561,7 @@ public List queryScanMetricsReportsByTraceId(@Nonnull St PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); var results = datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); - if (results == null || results.isEmpty()) { - return Collections.emptyList(); - } - return loadScanMetricsReportRoles(results); + return results == null ? Collections.emptyList() : results; } catch (SQLException e) { throw new RuntimeException( String.format( @@ -735,10 +594,7 @@ public List queryCommitMetricsReportsByTraceId( PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); var results = datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); - if (results == null || results.isEmpty()) { - return Collections.emptyList(); - } - return loadCommitMetricsReportRoles(results); + return results == null ? Collections.emptyList() : results; } catch (SQLException e) { throw new RuntimeException( String.format( diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java index ce17831c82..f4fed32b94 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java @@ -18,9 +18,13 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import jakarta.annotation.Nullable; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -34,6 +38,9 @@ public interface ModelCommitMetricsReport extends Converter { String TABLE_NAME = "COMMIT_METRICS_REPORT"; + /** ObjectMapper for JSON serialization/deserialization of roles. */ + ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + // Column names String REPORT_ID = "report_id"; String REALM_ID = "realm_id"; @@ -67,6 +74,7 @@ public interface ModelCommitMetricsReport extends Converter ALL_COLUMNS = @@ -103,6 +111,7 @@ public interface ModelCommitMetricsReport extends Converter getRoles() { @@ -190,6 +199,18 @@ default Set getRoles() { @Override default ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException { + // Parse principal_role_ids JSON array + Set roles = Set.of(); + String rolesJson = rs.getString(PRINCIPAL_ROLE_IDS); + if (rolesJson != null && !rolesJson.isBlank()) { + try { + roles = new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); + } catch (JsonProcessingException e) { + // Log and continue with empty roles + roles = Set.of(); + } + } + return ImmutableModelCommitMetricsReport.builder() .reportId(rs.getString(REPORT_ID)) .realmId(rs.getString(REALM_ID)) @@ -223,6 +244,7 @@ default ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException .totalFileSizeBytes(rs.getLong(TOTAL_FILE_SIZE_BYTES)) .totalDurationMs(rs.getLong(TOTAL_DURATION_MS)) .attempts(rs.getInt(ATTEMPTS)) + .roles(roles) .metadata(rs.getString(METADATA)) .build(); } @@ -262,9 +284,19 @@ default Map toMap(DatabaseType databaseType) { map.put(TOTAL_FILE_SIZE_BYTES, getTotalFileSizeBytes()); map.put(TOTAL_DURATION_MS, getTotalDurationMs()); map.put(ATTEMPTS, getAttempts()); + + // Serialize roles to JSON array + String rolesJson; + try { + rolesJson = OBJECT_MAPPER.writeValueAsString(getRoles()); + } catch (JsonProcessingException e) { + rolesJson = "[]"; + } if (databaseType.equals(DatabaseType.POSTGRES)) { + map.put(PRINCIPAL_ROLE_IDS, toJsonbPGobject(rolesJson)); map.put(METADATA, toJsonbPGobject(getMetadata() != null ? getMetadata() : "{}")); } else { + map.put(PRINCIPAL_ROLE_IDS, rolesJson); map.put(METADATA, getMetadata() != null ? getMetadata() : "{}"); } return map; diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java index b683edd0e3..824a0c6b45 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java @@ -18,9 +18,15 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; /** @@ -29,8 +35,22 @@ */ public class ModelCommitMetricsReportConverter implements Converter { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + @Override public ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException { + // Parse principal_role_ids JSON array + Set roles = Set.of(); + String rolesJson = rs.getString(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS); + if (rolesJson != null && !rolesJson.isBlank()) { + try { + roles = new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); + } catch (JsonProcessingException e) { + // Log and continue with empty roles + roles = Set.of(); + } + } + return ImmutableModelCommitMetricsReport.builder() .reportId(rs.getString(ModelCommitMetricsReport.REPORT_ID)) .realmId(rs.getString(ModelCommitMetricsReport.REALM_ID)) @@ -67,6 +87,7 @@ public ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException .totalFileSizeBytes(rs.getLong(ModelCommitMetricsReport.TOTAL_FILE_SIZE_BYTES)) .totalDurationMs(rs.getObject(ModelCommitMetricsReport.TOTAL_DURATION_MS, Long.class)) .attempts(rs.getObject(ModelCommitMetricsReport.ATTEMPTS, Integer.class)) + .roles(roles) .metadata(rs.getString(ModelCommitMetricsReport.METADATA)) .build(); } diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java index 012407ca86..f76625a9f7 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java @@ -18,9 +18,13 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import jakarta.annotation.Nullable; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -34,6 +38,9 @@ public interface ModelScanMetricsReport extends Converter { String TABLE_NAME = "SCAN_METRICS_REPORT"; + /** ObjectMapper for JSON serialization/deserialization of roles. */ + ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + // Column names String REPORT_ID = "report_id"; String REALM_ID = "realm_id"; @@ -67,6 +74,7 @@ public interface ModelScanMetricsReport extends Converter ALL_COLUMNS = @@ -103,6 +111,7 @@ public interface ModelScanMetricsReport extends Converter getRoles() { @@ -194,6 +203,18 @@ default Set getRoles() { @Override default ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { + // Parse principal_role_ids JSON array + Set roles = Set.of(); + String rolesJson = rs.getString(PRINCIPAL_ROLE_IDS); + if (rolesJson != null && !rolesJson.isBlank()) { + try { + roles = new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); + } catch (JsonProcessingException e) { + // Log and continue with empty roles + roles = Set.of(); + } + } + return ImmutableModelScanMetricsReport.builder() .reportId(rs.getString(REPORT_ID)) .realmId(rs.getString(REALM_ID)) @@ -227,6 +248,7 @@ default ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { .positionalDeleteFiles(rs.getLong(POSITIONAL_DELETE_FILES)) .indexedDeleteFiles(rs.getLong(INDEXED_DELETE_FILES)) .totalDeleteFileSizeBytes(rs.getLong(TOTAL_DELETE_FILE_SIZE_BYTES)) + .roles(roles) .metadata(rs.getString(METADATA)) .build(); } @@ -266,9 +288,19 @@ default Map toMap(DatabaseType databaseType) { map.put(POSITIONAL_DELETE_FILES, getPositionalDeleteFiles()); map.put(INDEXED_DELETE_FILES, getIndexedDeleteFiles()); map.put(TOTAL_DELETE_FILE_SIZE_BYTES, getTotalDeleteFileSizeBytes()); + + // Serialize roles to JSON array + String rolesJson; + try { + rolesJson = OBJECT_MAPPER.writeValueAsString(getRoles()); + } catch (JsonProcessingException e) { + rolesJson = "[]"; + } if (databaseType.equals(DatabaseType.POSTGRES)) { + map.put(PRINCIPAL_ROLE_IDS, toJsonbPGobject(rolesJson)); map.put(METADATA, toJsonbPGobject(getMetadata() != null ? getMetadata() : "{}")); } else { + map.put(PRINCIPAL_ROLE_IDS, rolesJson); map.put(METADATA, getMetadata() != null ? getMetadata() : "{}"); } return map; diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java index 1abbc0389c..9341ed1cfa 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java @@ -18,9 +18,15 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; /** @@ -29,8 +35,22 @@ */ public class ModelScanMetricsReportConverter implements Converter { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + @Override public ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { + // Parse principal_role_ids JSON array + Set roles = Set.of(); + String rolesJson = rs.getString(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS); + if (rolesJson != null && !rolesJson.isBlank()) { + try { + roles = new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); + } catch (JsonProcessingException e) { + // Log and continue with empty roles + roles = Set.of(); + } + } + return ImmutableModelScanMetricsReport.builder() .reportId(rs.getString(ModelScanMetricsReport.REPORT_ID)) .realmId(rs.getString(ModelScanMetricsReport.REALM_ID)) @@ -64,6 +84,7 @@ public ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { .positionalDeleteFiles(rs.getLong(ModelScanMetricsReport.POSITIONAL_DELETE_FILES)) .indexedDeleteFiles(rs.getLong(ModelScanMetricsReport.INDEXED_DELETE_FILES)) .totalDeleteFileSizeBytes(rs.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)) + .roles(roles) .metadata(rs.getString(ModelScanMetricsReport.METADATA)) .build(); } diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index 70e63a5b82..d77be512d7 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -57,10 +57,14 @@ void setUp() throws SQLException { datasourceOperations = new DatasourceOperations(dataSource, new TestJdbcConfiguration()); - // Execute schema v4 which includes metrics tables + // Execute schema v4 for entity tables ClassLoader classLoader = DatasourceOperations.class.getClassLoader(); - InputStream scriptStream = classLoader.getResourceAsStream("h2/schema-v4.sql"); - datasourceOperations.executeScript(scriptStream); + InputStream schemaStream = classLoader.getResourceAsStream("h2/schema-v4.sql"); + datasourceOperations.executeScript(schemaStream); + + // Execute metrics schema v1 for metrics tables + InputStream metricsSchemaStream = classLoader.getResourceAsStream("h2/schema-metrics-v1.sql"); + datasourceOperations.executeScript(metricsSchemaStream); PolarisDiagnostics diagServices = new PolarisDefaultDiagServiceImpl(); RealmContext realmContext = () -> "TEST_REALM"; @@ -664,7 +668,7 @@ void testWriteScanMetricsReportWithRoles() { .roles(Set.of("admin", "data_engineer", "analyst")) .build(); - // Should not throw - roles are written to junction table + // Should not throw - roles are serialized as JSON array in principal_role_ids column persistence.writeScanMetricsReport(report); } @@ -707,7 +711,7 @@ void testWriteCommitMetricsReportWithRoles() { .roles(Set.of("admin", "data_engineer")) .build(); - // Should not throw - roles are written to junction table + // Should not throw - roles are serialized as JSON array in principal_role_ids column persistence.writeCommitMetricsReport(report); } diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java index e27bce6660..6c31d5ebb9 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -25,6 +26,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Map; +import java.util.Set; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; import org.junit.jupiter.api.Test; import org.postgresql.util.PGobject; @@ -63,6 +65,8 @@ public class ModelCommitMetricsReportTest { private static final long TEST_TOTAL_FILE_SIZE = 10240000L; private static final long TEST_TOTAL_DURATION = 250L; private static final int TEST_ATTEMPTS = 1; + private static final Set TEST_ROLES = Set.of("admin", "data_engineer", "analyst"); + private static final String TEST_ROLES_JSON = "[\"admin\",\"data_engineer\",\"analyst\"]"; private static final String TEST_METADATA = "{\"commit\":\"info\"}"; @Test @@ -123,6 +127,8 @@ public void testFromResultSet() throws SQLException { when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DURATION_MS)) .thenReturn(TEST_TOTAL_DURATION); when(mockResultSet.getInt(ModelCommitMetricsReport.ATTEMPTS)).thenReturn(TEST_ATTEMPTS); + when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS)) + .thenReturn(TEST_ROLES_JSON); when(mockResultSet.getString(ModelCommitMetricsReport.METADATA)).thenReturn(TEST_METADATA); ModelCommitMetricsReport result = @@ -140,6 +146,7 @@ public void testFromResultSet() throws SQLException { assertEquals(TEST_ADDED_RECORDS, result.getAddedRecords()); assertEquals(TEST_TOTAL_DURATION, result.getTotalDurationMs()); assertEquals(TEST_ATTEMPTS, result.getAttempts()); + assertThat(result.getRoles()).containsExactlyInAnyOrderElementsOf(TEST_ROLES); assertEquals(TEST_METADATA, result.getMetadata()); } @@ -154,6 +161,9 @@ public void testToMapWithH2DatabaseType() { assertEquals(TEST_SNAPSHOT_ID, resultMap.get(ModelCommitMetricsReport.SNAPSHOT_ID)); assertEquals(TEST_OPERATION, resultMap.get(ModelCommitMetricsReport.OPERATION)); assertEquals(TEST_ADDED_DATA_FILES, resultMap.get(ModelCommitMetricsReport.ADDED_DATA_FILES)); + // principal_role_ids should be serialized as a JSON string for H2 + String rolesJson = (String) resultMap.get(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS); + assertThat(rolesJson).contains("admin", "data_engineer", "analyst"); assertEquals(TEST_METADATA, resultMap.get(ModelCommitMetricsReport.METADATA)); } @@ -164,11 +174,65 @@ public void testToMapWithPostgresType() { Map resultMap = report.toMap(DatabaseType.POSTGRES); assertEquals(TEST_REPORT_ID, resultMap.get(ModelCommitMetricsReport.REPORT_ID)); + // principal_role_ids should be serialized as a PGobject with type "jsonb" for Postgres + PGobject rolesPgObject = (PGobject) resultMap.get(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS); + assertEquals("jsonb", rolesPgObject.getType()); + assertThat(rolesPgObject.getValue()).contains("admin", "data_engineer", "analyst"); PGobject pgObject = (PGobject) resultMap.get(ModelCommitMetricsReport.METADATA); assertEquals("jsonb", pgObject.getType()); assertEquals(TEST_METADATA, pgObject.getValue()); } + @Test + public void testConverterFromResultSet() throws SQLException { + // Test the separate ModelCommitMetricsReportConverter class (used in query methods) + ModelCommitMetricsReportConverter converter = new ModelCommitMetricsReportConverter(); + + ResultSet mockResultSet = mock(ResultSet.class); + when(mockResultSet.getString(ModelCommitMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); + when(mockResultSet.getLong(ModelCommitMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); + when(mockResultSet.getLong(ModelCommitMetricsReport.TABLE_ID_COL)).thenReturn(TEST_TABLE_ID); + when(mockResultSet.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)).thenReturn(TEST_TIMESTAMP_MS); + when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_NAME)).thenReturn(TEST_PRINCIPAL); + when(mockResultSet.getString(ModelCommitMetricsReport.REQUEST_ID)).thenReturn(TEST_REQUEST_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.OTEL_TRACE_ID)).thenReturn(TEST_OTEL_TRACE_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.OTEL_SPAN_ID)).thenReturn(TEST_OTEL_SPAN_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.REPORT_TRACE_ID)).thenReturn(TEST_REPORT_TRACE_ID); + when(mockResultSet.getObject(ModelCommitMetricsReport.SNAPSHOT_ID, Long.class)).thenReturn(TEST_SNAPSHOT_ID); + when(mockResultSet.getObject(ModelCommitMetricsReport.SEQUENCE_NUMBER, Long.class)).thenReturn(TEST_SEQUENCE_NUMBER); + when(mockResultSet.getString(ModelCommitMetricsReport.OPERATION)).thenReturn(TEST_OPERATION); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_DATA_FILES)).thenReturn(TEST_ADDED_DATA_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_DATA_FILES)).thenReturn(TEST_REMOVED_DATA_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DATA_FILES)).thenReturn(TEST_TOTAL_DATA_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_DELETE_FILES)).thenReturn(TEST_ADDED_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_DELETE_FILES)).thenReturn(TEST_REMOVED_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DELETE_FILES)).thenReturn(TEST_TOTAL_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_EQUALITY_DELETE_FILES)).thenReturn(TEST_ADDED_EQUALITY_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_EQUALITY_DELETE_FILES)).thenReturn(TEST_REMOVED_EQUALITY_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_POSITIONAL_DELETE_FILES)).thenReturn(TEST_ADDED_POSITIONAL_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_POSITIONAL_DELETE_FILES)).thenReturn(TEST_REMOVED_POSITIONAL_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_RECORDS)).thenReturn(TEST_ADDED_RECORDS); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_RECORDS)).thenReturn(TEST_REMOVED_RECORDS); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_RECORDS)).thenReturn(TEST_TOTAL_RECORDS); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_FILE_SIZE_BYTES)).thenReturn(TEST_ADDED_FILE_SIZE); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_FILE_SIZE_BYTES)).thenReturn(TEST_REMOVED_FILE_SIZE); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_FILE_SIZE_BYTES)).thenReturn(TEST_TOTAL_FILE_SIZE); + when(mockResultSet.getObject(ModelCommitMetricsReport.TOTAL_DURATION_MS, Long.class)).thenReturn(TEST_TOTAL_DURATION); + when(mockResultSet.getObject(ModelCommitMetricsReport.ATTEMPTS, Integer.class)).thenReturn(TEST_ATTEMPTS); + when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS)).thenReturn(TEST_ROLES_JSON); + when(mockResultSet.getString(ModelCommitMetricsReport.METADATA)).thenReturn(TEST_METADATA); + + ModelCommitMetricsReport result = converter.fromResultSet(mockResultSet); + + assertEquals(TEST_REPORT_ID, result.getReportId()); + assertEquals(TEST_REALM_ID, result.getRealmId()); + assertEquals(TEST_CATALOG_ID, result.getCatalogId()); + assertThat(result.getRoles()).containsExactlyInAnyOrderElementsOf(TEST_ROLES); + assertEquals(TEST_METADATA, result.getMetadata()); + } + private ModelCommitMetricsReport createTestReport() { return ImmutableModelCommitMetricsReport.builder() .reportId(TEST_REPORT_ID) @@ -201,6 +265,7 @@ private ModelCommitMetricsReport createTestReport() { .totalFileSizeBytes(TEST_TOTAL_FILE_SIZE) .totalDurationMs(TEST_TOTAL_DURATION) .attempts(TEST_ATTEMPTS) + .roles(TEST_ROLES) .metadata(TEST_METADATA) .build(); } diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java index bc0200886c..7e6b054142 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -25,6 +26,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Map; +import java.util.Set; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; import org.junit.jupiter.api.Test; import org.postgresql.util.PGobject; @@ -63,6 +65,8 @@ public class ModelScanMetricsReportTest { private static final long TEST_POSITIONAL_DELETE_FILES = 1L; private static final long TEST_INDEXED_DELETE_FILES = 0L; private static final long TEST_DELETE_FILE_SIZE = 2048L; + private static final Set TEST_ROLES = Set.of("admin", "data_engineer", "analyst"); + private static final String TEST_ROLES_JSON = "[\"admin\",\"data_engineer\",\"analyst\"]"; private static final String TEST_METADATA = "{\"custom\":\"value\"}"; @Test @@ -123,6 +127,8 @@ public void testFromResultSet() throws SQLException { .thenReturn(TEST_INDEXED_DELETE_FILES); when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)) .thenReturn(TEST_DELETE_FILE_SIZE); + when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS)) + .thenReturn(TEST_ROLES_JSON); when(mockResultSet.getString(ModelScanMetricsReport.METADATA)).thenReturn(TEST_METADATA); ModelScanMetricsReport result = ModelScanMetricsReport.CONVERTER.fromResultSet(mockResultSet); @@ -140,6 +146,7 @@ public void testFromResultSet() throws SQLException { assertEquals(TEST_RESULT_DATA_FILES, result.getResultDataFiles()); assertEquals(TEST_TOTAL_FILE_SIZE, result.getTotalFileSizeBytes()); assertEquals(TEST_PLANNING_DURATION, result.getTotalPlanningDurationMs()); + assertThat(result.getRoles()).containsExactlyInAnyOrderElementsOf(TEST_ROLES); assertEquals(TEST_METADATA, result.getMetadata()); } @@ -156,6 +163,9 @@ public void testToMapWithH2DatabaseType() { assertEquals(TEST_TABLE_ID, resultMap.get(ModelScanMetricsReport.TABLE_ID_COL)); assertEquals(TEST_TIMESTAMP_MS, resultMap.get(ModelScanMetricsReport.TIMESTAMP_MS)); assertEquals(TEST_RESULT_DATA_FILES, resultMap.get(ModelScanMetricsReport.RESULT_DATA_FILES)); + // principal_role_ids should be serialized as a JSON string for H2 + String rolesJson = (String) resultMap.get(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS); + assertThat(rolesJson).contains("admin", "data_engineer", "analyst"); assertEquals(TEST_METADATA, resultMap.get(ModelScanMetricsReport.METADATA)); } @@ -166,11 +176,65 @@ public void testToMapWithPostgresType() { Map resultMap = report.toMap(DatabaseType.POSTGRES); assertEquals(TEST_REPORT_ID, resultMap.get(ModelScanMetricsReport.REPORT_ID)); + // principal_role_ids should be serialized as a PGobject with type "jsonb" for Postgres + PGobject rolesPgObject = (PGobject) resultMap.get(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS); + assertEquals("jsonb", rolesPgObject.getType()); + assertThat(rolesPgObject.getValue()).contains("admin", "data_engineer", "analyst"); PGobject pgObject = (PGobject) resultMap.get(ModelScanMetricsReport.METADATA); assertEquals("jsonb", pgObject.getType()); assertEquals(TEST_METADATA, pgObject.getValue()); } + @Test + public void testConverterFromResultSet() throws SQLException { + // Test the separate ModelScanMetricsReportConverter class (used in query methods) + ModelScanMetricsReportConverter converter = new ModelScanMetricsReportConverter(); + + ResultSet mockResultSet = mock(ResultSet.class); + when(mockResultSet.getString(ModelScanMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); + when(mockResultSet.getString(ModelScanMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); + when(mockResultSet.getLong(ModelScanMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); + when(mockResultSet.getString(ModelScanMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); + when(mockResultSet.getLong(ModelScanMetricsReport.TABLE_ID_COL)).thenReturn(TEST_TABLE_ID); + when(mockResultSet.getLong(ModelScanMetricsReport.TIMESTAMP_MS)).thenReturn(TEST_TIMESTAMP_MS); + when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_NAME)).thenReturn(TEST_PRINCIPAL); + when(mockResultSet.getString(ModelScanMetricsReport.REQUEST_ID)).thenReturn(TEST_REQUEST_ID); + when(mockResultSet.getString(ModelScanMetricsReport.OTEL_TRACE_ID)).thenReturn(TEST_OTEL_TRACE_ID); + when(mockResultSet.getString(ModelScanMetricsReport.OTEL_SPAN_ID)).thenReturn(TEST_OTEL_SPAN_ID); + when(mockResultSet.getString(ModelScanMetricsReport.REPORT_TRACE_ID)).thenReturn(TEST_REPORT_TRACE_ID); + when(mockResultSet.getObject(ModelScanMetricsReport.SNAPSHOT_ID, Long.class)).thenReturn(TEST_SNAPSHOT_ID); + when(mockResultSet.getObject(ModelScanMetricsReport.SCHEMA_ID, Integer.class)).thenReturn(TEST_SCHEMA_ID); + when(mockResultSet.getString(ModelScanMetricsReport.FILTER_EXPRESSION)).thenReturn(TEST_FILTER); + when(mockResultSet.getString(ModelScanMetricsReport.PROJECTED_FIELD_IDS)).thenReturn(TEST_PROJECTED_IDS); + when(mockResultSet.getString(ModelScanMetricsReport.PROJECTED_FIELD_NAMES)).thenReturn(TEST_PROJECTED_NAMES); + when(mockResultSet.getLong(ModelScanMetricsReport.RESULT_DATA_FILES)).thenReturn(TEST_RESULT_DATA_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.RESULT_DELETE_FILES)).thenReturn(TEST_RESULT_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_FILE_SIZE_BYTES)).thenReturn(TEST_TOTAL_FILE_SIZE); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DATA_MANIFESTS)).thenReturn(TEST_TOTAL_DATA_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_MANIFESTS)).thenReturn(TEST_TOTAL_DELETE_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SCANNED_DATA_MANIFESTS)).thenReturn(TEST_SCANNED_DATA_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SCANNED_DELETE_MANIFESTS)).thenReturn(TEST_SCANNED_DELETE_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DATA_MANIFESTS)).thenReturn(TEST_SKIPPED_DATA_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DELETE_MANIFESTS)).thenReturn(TEST_SKIPPED_DELETE_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DATA_FILES)).thenReturn(TEST_SKIPPED_DATA_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DELETE_FILES)).thenReturn(TEST_SKIPPED_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_PLANNING_DURATION_MS)).thenReturn(TEST_PLANNING_DURATION); + when(mockResultSet.getLong(ModelScanMetricsReport.EQUALITY_DELETE_FILES)).thenReturn(TEST_EQUALITY_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.POSITIONAL_DELETE_FILES)).thenReturn(TEST_POSITIONAL_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.INDEXED_DELETE_FILES)).thenReturn(TEST_INDEXED_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)).thenReturn(TEST_DELETE_FILE_SIZE); + when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS)).thenReturn(TEST_ROLES_JSON); + when(mockResultSet.getString(ModelScanMetricsReport.METADATA)).thenReturn(TEST_METADATA); + + ModelScanMetricsReport result = converter.fromResultSet(mockResultSet); + + assertEquals(TEST_REPORT_ID, result.getReportId()); + assertEquals(TEST_REALM_ID, result.getRealmId()); + assertEquals(TEST_CATALOG_ID, result.getCatalogId()); + assertThat(result.getRoles()).containsExactlyInAnyOrderElementsOf(TEST_ROLES); + assertEquals(TEST_METADATA, result.getMetadata()); + } + private ModelScanMetricsReport createTestReport() { return ImmutableModelScanMetricsReport.builder() .reportId(TEST_REPORT_ID) @@ -199,6 +263,7 @@ private ModelScanMetricsReport createTestReport() { .positionalDeleteFiles(TEST_POSITIONAL_DELETE_FILES) .indexedDeleteFiles(TEST_INDEXED_DELETE_FILES) .totalDeleteFileSizeBytes(TEST_DELETE_FILE_SIZE) + .roles(TEST_ROLES) .metadata(TEST_METADATA) .build(); } From 4dece9e4ba5ff05fab7c96eb7cc9e148c46c104c Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 20:37:25 -0800 Subject: [PATCH 57/67] style: Apply spotless formatting --- .../jdbc/JdbcBasePersistenceImpl.java | 5 -- .../jdbc/models/ModelCommitMetricsReport.java | 3 +- .../ModelCommitMetricsReportConverter.java | 3 +- .../jdbc/models/ModelScanMetricsReport.java | 3 +- .../ModelScanMetricsReportConverter.java | 3 +- .../models/ModelCommitMetricsReportTest.java | 78 ++++++++++++------- .../models/ModelScanMetricsReportTest.java | 72 +++++++++++------ .../service/config/ServiceProducers.java | 2 +- 8 files changed, 109 insertions(+), 60 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index f5764fd143..cb8dce09d0 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -28,13 +28,11 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; -import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Predicate; @@ -70,10 +68,7 @@ import org.apache.polaris.core.storage.PolarisStorageIntegration; import org.apache.polaris.core.storage.PolarisStorageIntegrationProvider; import org.apache.polaris.core.storage.StorageLocation; -import org.apache.polaris.persistence.relational.jdbc.models.Converter; import org.apache.polaris.persistence.relational.jdbc.models.EntityNameLookupRecordConverter; -import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelCommitMetricsReport; -import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelScanMetricsReport; import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReportConverter; import org.apache.polaris.persistence.relational.jdbc.models.ModelEntity; diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java index f4fed32b94..2a90411584 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java @@ -204,7 +204,8 @@ default ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException String rolesJson = rs.getString(PRINCIPAL_ROLE_IDS); if (rolesJson != null && !rolesJson.isBlank()) { try { - roles = new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); + roles = + new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); } catch (JsonProcessingException e) { // Log and continue with empty roles roles = Set.of(); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java index 824a0c6b45..05e0556360 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java @@ -44,7 +44,8 @@ public ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException String rolesJson = rs.getString(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS); if (rolesJson != null && !rolesJson.isBlank()) { try { - roles = new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); + roles = + new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); } catch (JsonProcessingException e) { // Log and continue with empty roles roles = Set.of(); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java index f76625a9f7..5d99e15507 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java @@ -208,7 +208,8 @@ default ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { String rolesJson = rs.getString(PRINCIPAL_ROLE_IDS); if (rolesJson != null && !rolesJson.isBlank()) { try { - roles = new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); + roles = + new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); } catch (JsonProcessingException e) { // Log and continue with empty roles roles = Set.of(); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java index 9341ed1cfa..bea27ee381 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java @@ -44,7 +44,8 @@ public ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { String rolesJson = rs.getString(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS); if (rolesJson != null && !rolesJson.isBlank()) { try { - roles = new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); + roles = + new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); } catch (JsonProcessingException e) { // Log and continue with empty roles roles = Set.of(); diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java index 6c31d5ebb9..0ad9a412c6 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java @@ -194,34 +194,60 @@ public void testConverterFromResultSet() throws SQLException { when(mockResultSet.getLong(ModelCommitMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); when(mockResultSet.getString(ModelCommitMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); when(mockResultSet.getLong(ModelCommitMetricsReport.TABLE_ID_COL)).thenReturn(TEST_TABLE_ID); - when(mockResultSet.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)).thenReturn(TEST_TIMESTAMP_MS); - when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_NAME)).thenReturn(TEST_PRINCIPAL); + when(mockResultSet.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)) + .thenReturn(TEST_TIMESTAMP_MS); + when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_NAME)) + .thenReturn(TEST_PRINCIPAL); when(mockResultSet.getString(ModelCommitMetricsReport.REQUEST_ID)).thenReturn(TEST_REQUEST_ID); - when(mockResultSet.getString(ModelCommitMetricsReport.OTEL_TRACE_ID)).thenReturn(TEST_OTEL_TRACE_ID); - when(mockResultSet.getString(ModelCommitMetricsReport.OTEL_SPAN_ID)).thenReturn(TEST_OTEL_SPAN_ID); - when(mockResultSet.getString(ModelCommitMetricsReport.REPORT_TRACE_ID)).thenReturn(TEST_REPORT_TRACE_ID); - when(mockResultSet.getObject(ModelCommitMetricsReport.SNAPSHOT_ID, Long.class)).thenReturn(TEST_SNAPSHOT_ID); - when(mockResultSet.getObject(ModelCommitMetricsReport.SEQUENCE_NUMBER, Long.class)).thenReturn(TEST_SEQUENCE_NUMBER); + when(mockResultSet.getString(ModelCommitMetricsReport.OTEL_TRACE_ID)) + .thenReturn(TEST_OTEL_TRACE_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.OTEL_SPAN_ID)) + .thenReturn(TEST_OTEL_SPAN_ID); + when(mockResultSet.getString(ModelCommitMetricsReport.REPORT_TRACE_ID)) + .thenReturn(TEST_REPORT_TRACE_ID); + when(mockResultSet.getObject(ModelCommitMetricsReport.SNAPSHOT_ID, Long.class)) + .thenReturn(TEST_SNAPSHOT_ID); + when(mockResultSet.getObject(ModelCommitMetricsReport.SEQUENCE_NUMBER, Long.class)) + .thenReturn(TEST_SEQUENCE_NUMBER); when(mockResultSet.getString(ModelCommitMetricsReport.OPERATION)).thenReturn(TEST_OPERATION); - when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_DATA_FILES)).thenReturn(TEST_ADDED_DATA_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_DATA_FILES)).thenReturn(TEST_REMOVED_DATA_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DATA_FILES)).thenReturn(TEST_TOTAL_DATA_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_DELETE_FILES)).thenReturn(TEST_ADDED_DELETE_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_DELETE_FILES)).thenReturn(TEST_REMOVED_DELETE_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DELETE_FILES)).thenReturn(TEST_TOTAL_DELETE_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_EQUALITY_DELETE_FILES)).thenReturn(TEST_ADDED_EQUALITY_DELETE_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_EQUALITY_DELETE_FILES)).thenReturn(TEST_REMOVED_EQUALITY_DELETE_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_POSITIONAL_DELETE_FILES)).thenReturn(TEST_ADDED_POSITIONAL_DELETE_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_POSITIONAL_DELETE_FILES)).thenReturn(TEST_REMOVED_POSITIONAL_DELETE_FILES); - when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_RECORDS)).thenReturn(TEST_ADDED_RECORDS); - when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_RECORDS)).thenReturn(TEST_REMOVED_RECORDS); - when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_RECORDS)).thenReturn(TEST_TOTAL_RECORDS); - when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_FILE_SIZE_BYTES)).thenReturn(TEST_ADDED_FILE_SIZE); - when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_FILE_SIZE_BYTES)).thenReturn(TEST_REMOVED_FILE_SIZE); - when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_FILE_SIZE_BYTES)).thenReturn(TEST_TOTAL_FILE_SIZE); - when(mockResultSet.getObject(ModelCommitMetricsReport.TOTAL_DURATION_MS, Long.class)).thenReturn(TEST_TOTAL_DURATION); - when(mockResultSet.getObject(ModelCommitMetricsReport.ATTEMPTS, Integer.class)).thenReturn(TEST_ATTEMPTS); - when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS)).thenReturn(TEST_ROLES_JSON); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_DATA_FILES)) + .thenReturn(TEST_ADDED_DATA_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_DATA_FILES)) + .thenReturn(TEST_REMOVED_DATA_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DATA_FILES)) + .thenReturn(TEST_TOTAL_DATA_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_DELETE_FILES)) + .thenReturn(TEST_ADDED_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_DELETE_FILES)) + .thenReturn(TEST_REMOVED_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DELETE_FILES)) + .thenReturn(TEST_TOTAL_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_EQUALITY_DELETE_FILES)) + .thenReturn(TEST_ADDED_EQUALITY_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_EQUALITY_DELETE_FILES)) + .thenReturn(TEST_REMOVED_EQUALITY_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_POSITIONAL_DELETE_FILES)) + .thenReturn(TEST_ADDED_POSITIONAL_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_POSITIONAL_DELETE_FILES)) + .thenReturn(TEST_REMOVED_POSITIONAL_DELETE_FILES); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_RECORDS)) + .thenReturn(TEST_ADDED_RECORDS); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_RECORDS)) + .thenReturn(TEST_REMOVED_RECORDS); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_RECORDS)) + .thenReturn(TEST_TOTAL_RECORDS); + when(mockResultSet.getLong(ModelCommitMetricsReport.ADDED_FILE_SIZE_BYTES)) + .thenReturn(TEST_ADDED_FILE_SIZE); + when(mockResultSet.getLong(ModelCommitMetricsReport.REMOVED_FILE_SIZE_BYTES)) + .thenReturn(TEST_REMOVED_FILE_SIZE); + when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_FILE_SIZE_BYTES)) + .thenReturn(TEST_TOTAL_FILE_SIZE); + when(mockResultSet.getObject(ModelCommitMetricsReport.TOTAL_DURATION_MS, Long.class)) + .thenReturn(TEST_TOTAL_DURATION); + when(mockResultSet.getObject(ModelCommitMetricsReport.ATTEMPTS, Integer.class)) + .thenReturn(TEST_ATTEMPTS); + when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS)) + .thenReturn(TEST_ROLES_JSON); when(mockResultSet.getString(ModelCommitMetricsReport.METADATA)).thenReturn(TEST_METADATA); ModelCommitMetricsReport result = converter.fromResultSet(mockResultSet); diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java index 7e6b054142..5edb05b7b9 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java @@ -199,31 +199,55 @@ public void testConverterFromResultSet() throws SQLException { when(mockResultSet.getLong(ModelScanMetricsReport.TIMESTAMP_MS)).thenReturn(TEST_TIMESTAMP_MS); when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_NAME)).thenReturn(TEST_PRINCIPAL); when(mockResultSet.getString(ModelScanMetricsReport.REQUEST_ID)).thenReturn(TEST_REQUEST_ID); - when(mockResultSet.getString(ModelScanMetricsReport.OTEL_TRACE_ID)).thenReturn(TEST_OTEL_TRACE_ID); - when(mockResultSet.getString(ModelScanMetricsReport.OTEL_SPAN_ID)).thenReturn(TEST_OTEL_SPAN_ID); - when(mockResultSet.getString(ModelScanMetricsReport.REPORT_TRACE_ID)).thenReturn(TEST_REPORT_TRACE_ID); - when(mockResultSet.getObject(ModelScanMetricsReport.SNAPSHOT_ID, Long.class)).thenReturn(TEST_SNAPSHOT_ID); - when(mockResultSet.getObject(ModelScanMetricsReport.SCHEMA_ID, Integer.class)).thenReturn(TEST_SCHEMA_ID); + when(mockResultSet.getString(ModelScanMetricsReport.OTEL_TRACE_ID)) + .thenReturn(TEST_OTEL_TRACE_ID); + when(mockResultSet.getString(ModelScanMetricsReport.OTEL_SPAN_ID)) + .thenReturn(TEST_OTEL_SPAN_ID); + when(mockResultSet.getString(ModelScanMetricsReport.REPORT_TRACE_ID)) + .thenReturn(TEST_REPORT_TRACE_ID); + when(mockResultSet.getObject(ModelScanMetricsReport.SNAPSHOT_ID, Long.class)) + .thenReturn(TEST_SNAPSHOT_ID); + when(mockResultSet.getObject(ModelScanMetricsReport.SCHEMA_ID, Integer.class)) + .thenReturn(TEST_SCHEMA_ID); when(mockResultSet.getString(ModelScanMetricsReport.FILTER_EXPRESSION)).thenReturn(TEST_FILTER); - when(mockResultSet.getString(ModelScanMetricsReport.PROJECTED_FIELD_IDS)).thenReturn(TEST_PROJECTED_IDS); - when(mockResultSet.getString(ModelScanMetricsReport.PROJECTED_FIELD_NAMES)).thenReturn(TEST_PROJECTED_NAMES); - when(mockResultSet.getLong(ModelScanMetricsReport.RESULT_DATA_FILES)).thenReturn(TEST_RESULT_DATA_FILES); - when(mockResultSet.getLong(ModelScanMetricsReport.RESULT_DELETE_FILES)).thenReturn(TEST_RESULT_DELETE_FILES); - when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_FILE_SIZE_BYTES)).thenReturn(TEST_TOTAL_FILE_SIZE); - when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DATA_MANIFESTS)).thenReturn(TEST_TOTAL_DATA_MANIFESTS); - when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_MANIFESTS)).thenReturn(TEST_TOTAL_DELETE_MANIFESTS); - when(mockResultSet.getLong(ModelScanMetricsReport.SCANNED_DATA_MANIFESTS)).thenReturn(TEST_SCANNED_DATA_MANIFESTS); - when(mockResultSet.getLong(ModelScanMetricsReport.SCANNED_DELETE_MANIFESTS)).thenReturn(TEST_SCANNED_DELETE_MANIFESTS); - when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DATA_MANIFESTS)).thenReturn(TEST_SKIPPED_DATA_MANIFESTS); - when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DELETE_MANIFESTS)).thenReturn(TEST_SKIPPED_DELETE_MANIFESTS); - when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DATA_FILES)).thenReturn(TEST_SKIPPED_DATA_FILES); - when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DELETE_FILES)).thenReturn(TEST_SKIPPED_DELETE_FILES); - when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_PLANNING_DURATION_MS)).thenReturn(TEST_PLANNING_DURATION); - when(mockResultSet.getLong(ModelScanMetricsReport.EQUALITY_DELETE_FILES)).thenReturn(TEST_EQUALITY_DELETE_FILES); - when(mockResultSet.getLong(ModelScanMetricsReport.POSITIONAL_DELETE_FILES)).thenReturn(TEST_POSITIONAL_DELETE_FILES); - when(mockResultSet.getLong(ModelScanMetricsReport.INDEXED_DELETE_FILES)).thenReturn(TEST_INDEXED_DELETE_FILES); - when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)).thenReturn(TEST_DELETE_FILE_SIZE); - when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS)).thenReturn(TEST_ROLES_JSON); + when(mockResultSet.getString(ModelScanMetricsReport.PROJECTED_FIELD_IDS)) + .thenReturn(TEST_PROJECTED_IDS); + when(mockResultSet.getString(ModelScanMetricsReport.PROJECTED_FIELD_NAMES)) + .thenReturn(TEST_PROJECTED_NAMES); + when(mockResultSet.getLong(ModelScanMetricsReport.RESULT_DATA_FILES)) + .thenReturn(TEST_RESULT_DATA_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.RESULT_DELETE_FILES)) + .thenReturn(TEST_RESULT_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_FILE_SIZE_BYTES)) + .thenReturn(TEST_TOTAL_FILE_SIZE); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DATA_MANIFESTS)) + .thenReturn(TEST_TOTAL_DATA_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_MANIFESTS)) + .thenReturn(TEST_TOTAL_DELETE_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SCANNED_DATA_MANIFESTS)) + .thenReturn(TEST_SCANNED_DATA_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SCANNED_DELETE_MANIFESTS)) + .thenReturn(TEST_SCANNED_DELETE_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DATA_MANIFESTS)) + .thenReturn(TEST_SKIPPED_DATA_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DELETE_MANIFESTS)) + .thenReturn(TEST_SKIPPED_DELETE_MANIFESTS); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DATA_FILES)) + .thenReturn(TEST_SKIPPED_DATA_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.SKIPPED_DELETE_FILES)) + .thenReturn(TEST_SKIPPED_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_PLANNING_DURATION_MS)) + .thenReturn(TEST_PLANNING_DURATION); + when(mockResultSet.getLong(ModelScanMetricsReport.EQUALITY_DELETE_FILES)) + .thenReturn(TEST_EQUALITY_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.POSITIONAL_DELETE_FILES)) + .thenReturn(TEST_POSITIONAL_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.INDEXED_DELETE_FILES)) + .thenReturn(TEST_INDEXED_DELETE_FILES); + when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)) + .thenReturn(TEST_DELETE_FILE_SIZE); + when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS)) + .thenReturn(TEST_ROLES_JSON); when(mockResultSet.getString(ModelScanMetricsReport.METADATA)).thenReturn(TEST_METADATA); ModelScanMetricsReport result = converter.fromResultSet(mockResultSet); diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 6e003ba951..62e7a3aedc 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -47,9 +47,9 @@ import org.apache.polaris.core.persistence.BasePersistence; import org.apache.polaris.core.persistence.MetaStoreManagerFactory; import org.apache.polaris.core.persistence.PolarisMetaStoreManager; -import org.apache.polaris.core.persistence.metrics.MetricsPersistence; import org.apache.polaris.core.persistence.bootstrap.RootCredentialsSet; import org.apache.polaris.core.persistence.cache.EntityCache; +import org.apache.polaris.core.persistence.metrics.MetricsPersistence; import org.apache.polaris.core.persistence.resolver.ResolutionManifestFactory; import org.apache.polaris.core.persistence.resolver.ResolutionManifestFactoryImpl; import org.apache.polaris.core.persistence.resolver.Resolver; From 68fed869268d82312dd3d08a675ed4d6fb78ef4b Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 06:33:17 -0800 Subject: [PATCH 58/67] test: Add tests for --include-metrics bootstrap option - Added unit tests for JdbcBootstrapUtils.shouldIncludeMetrics() method: - Test when schemaOptions is null (returns false) - Test when includeMetrics is true (returns true) - Test when includeMetrics is false (returns false) - Added integration tests in RelationalJdbcBootstrapCommandTest: - testBootstrapWithIncludeMetrics: verifies --include-metrics flag works - testBootstrapWithoutIncludeMetrics: verifies default behavior Per PR review comment from singhpk234 --- .../jdbc/JdbcBootstrapUtilsTest.java | 31 +++++++++++++++++++ .../RelationalJdbcBootstrapCommandTest.java | 23 ++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtilsTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtilsTest.java index 6a9eb95524..f36ab68488 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtilsTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/JdbcBootstrapUtilsTest.java @@ -150,4 +150,35 @@ void whenSchemaOptionsIsNull_shouldReturnDefault() { assertEquals(-1, result); } } + + @Nested + @ExtendWith(MockitoExtension.class) + class ShouldIncludeMetricsTests { + + @Mock private BootstrapOptions mockBootstrapOptions; + @Mock private SchemaOptions mockSchemaOptions; + + @Test + void whenSchemaOptionsIsNull_shouldReturnFalse() { + when(mockBootstrapOptions.schemaOptions()).thenReturn(null); + boolean result = JdbcBootstrapUtils.shouldIncludeMetrics(mockBootstrapOptions); + assertEquals(false, result); + } + + @Test + void whenIncludeMetricsIsTrue_shouldReturnTrue() { + when(mockBootstrapOptions.schemaOptions()).thenReturn(mockSchemaOptions); + when(mockSchemaOptions.includeMetrics()).thenReturn(true); + boolean result = JdbcBootstrapUtils.shouldIncludeMetrics(mockBootstrapOptions); + assertEquals(true, result); + } + + @Test + void whenIncludeMetricsIsFalse_shouldReturnFalse() { + when(mockBootstrapOptions.schemaOptions()).thenReturn(mockSchemaOptions); + when(mockSchemaOptions.includeMetrics()).thenReturn(false); + boolean result = JdbcBootstrapUtils.shouldIncludeMetrics(mockBootstrapOptions); + assertEquals(false, result); + } + } } diff --git a/runtime/admin/src/test/java/org/apache/polaris/admintool/relational/jdbc/RelationalJdbcBootstrapCommandTest.java b/runtime/admin/src/test/java/org/apache/polaris/admintool/relational/jdbc/RelationalJdbcBootstrapCommandTest.java index 31f3a9eea0..73abd2cbdf 100644 --- a/runtime/admin/src/test/java/org/apache/polaris/admintool/relational/jdbc/RelationalJdbcBootstrapCommandTest.java +++ b/runtime/admin/src/test/java/org/apache/polaris/admintool/relational/jdbc/RelationalJdbcBootstrapCommandTest.java @@ -44,4 +44,27 @@ public void testBootstrapFailsWhenAddingRealmWithDifferentSchemaVersion( // assertThat(result2.exitCode()).isEqualTo(EXIT_CODE_BOOTSTRAP_ERROR); // assertThat(result2.getOutput()).contains("Cannot bootstrap due to schema version mismatch."); } + + @Test + public void testBootstrapWithIncludeMetrics(QuarkusMainLauncher launcher) { + // Test that --include-metrics option is accepted and bootstrap completes successfully. + // The metrics tables are created during bootstrap when this flag is set. + LaunchResult result = + launcher.launch( + "bootstrap", "-r", "realm1", "-c", "realm1,root,s3cr3t", "--include-metrics"); + assertThat(result.exitCode()).isEqualTo(0); + assertThat(result.getOutput()) + .contains("Realm 'realm1' successfully bootstrapped.") + .contains("Bootstrap completed successfully."); + } + + @Test + public void testBootstrapWithoutIncludeMetrics(QuarkusMainLauncher launcher) { + // Test that bootstrap works without --include-metrics (default behavior) + LaunchResult result = launcher.launch("bootstrap", "-r", "realm1", "-c", "realm1,root,s3cr3t"); + assertThat(result.exitCode()).isEqualTo(0); + assertThat(result.getOutput()) + .contains("Realm 'realm1' successfully bootstrapped.") + .contains("Bootstrap completed successfully."); + } } From eabdd15ee274ed7ca3b95ef00f833e187d6d8376 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 09:36:37 -0800 Subject: [PATCH 59/67] refactor: Remove schema-v4 version fixes (moved to separate PR) The schema-v4 version number fixes have been extracted to a separate branch (fix-schema-v4-version-number) for an independent PR. This reverts the version changes back to match main branch. --- .../polaris/persistence/relational/jdbc/DatabaseType.java | 2 +- persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql | 2 +- .../relational-jdbc/src/main/resources/postgres/schema-v4.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java index 28b77ad8c2..b29b5732a9 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java @@ -50,7 +50,7 @@ public static DatabaseType fromDisplayName(String displayName) { */ public InputStream openInitScriptResource(int schemaVersion) { // Preconditions check is simpler and more direct than a switch default - if (schemaVersion <= 0 || schemaVersion > 4) { + if (schemaVersion <= 0 || schemaVersion > 3) { throw new IllegalArgumentException("Unknown or invalid schema version " + schemaVersion); } diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql index 1e766a6969..0f2ac75cc6 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-v4.sql @@ -31,7 +31,7 @@ CREATE TABLE IF NOT EXISTS version ( MERGE INTO version (version_key, version_value) KEY (version_key) - VALUES ('version', 4); + VALUES ('version', 3); -- H2 supports COMMENT, but some modes may ignore it COMMENT ON TABLE version IS 'the version of the JDBC schema in use'; diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql index b1a5b5870b..47d4ea8834 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-v4.sql @@ -28,7 +28,7 @@ CREATE TABLE IF NOT EXISTS version ( version_value INTEGER NOT NULL ); INSERT INTO version (version_key, version_value) -VALUES ('version', 4) +VALUES ('version', 3) ON CONFLICT (version_key) DO UPDATE SET version_value = EXCLUDED.version_value; COMMENT ON TABLE version IS 'the version of the JDBC schema in use'; From 86789d71b4cde12dc28073809586e562c81d64ff Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 09:44:56 -0800 Subject: [PATCH 60/67] refactor: Remove principal_role_ids from metrics schema Per PR review feedback from dimas-b: Polaris supports external IdP and PDP (e.g. Keycloak and OPA), and the roles stored in metrics tables may not be aligned with AuthZ decisions. Removed principal_role_ids column from both scan_metrics_report and commit_metrics_report tables in H2 and PostgreSQL schemas. --- .../src/main/resources/h2/schema-metrics-v1.sql | 6 ------ .../src/main/resources/postgres/schema-metrics-v1.sql | 8 -------- 2 files changed, 14 deletions(-) diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql index 0949fe59cd..6cc7649723 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql @@ -92,9 +92,6 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( indexed_delete_files BIGINT DEFAULT 0, total_delete_file_size_bytes BIGINT DEFAULT 0, - -- Principal roles (denormalized as JSON array) - principal_role_ids TEXT DEFAULT '[]', - -- Additional metadata (for extensibility) metadata TEXT DEFAULT '{}', @@ -162,9 +159,6 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( total_duration_ms BIGINT DEFAULT 0, attempts INTEGER DEFAULT 1, - -- Principal roles (denormalized as JSON array) - principal_role_ids TEXT DEFAULT '[]', - -- Additional metadata (for extensibility) metadata TEXT DEFAULT '{}', diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql index 596779ad89..4207f949da 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql @@ -92,9 +92,6 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( indexed_delete_files BIGINT DEFAULT 0, total_delete_file_size_bytes BIGINT DEFAULT 0, - -- Principal roles (denormalized as JSON array) - principal_role_ids JSONB DEFAULT '[]'::JSONB, - -- Additional metadata (for extensibility) metadata JSONB DEFAULT '{}'::JSONB, @@ -107,7 +104,6 @@ COMMENT ON COLUMN scan_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; COMMENT ON COLUMN scan_metrics_report.catalog_id IS 'Catalog ID'; COMMENT ON COLUMN scan_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; COMMENT ON COLUMN scan_metrics_report.report_trace_id IS 'Trace ID from report metadata'; -COMMENT ON COLUMN scan_metrics_report.principal_role_ids IS 'JSON array of activated principal role IDs'; -- Index for retention cleanup by timestamp CREATE INDEX IF NOT EXISTS idx_scan_report_timestamp @@ -169,9 +165,6 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( total_duration_ms BIGINT DEFAULT 0, attempts INTEGER DEFAULT 1, - -- Principal roles (denormalized as JSON array) - principal_role_ids JSONB DEFAULT '[]'::JSONB, - -- Additional metadata (for extensibility) metadata JSONB DEFAULT '{}'::JSONB, @@ -183,7 +176,6 @@ COMMENT ON COLUMN commit_metrics_report.report_id IS 'Unique identifier for the COMMENT ON COLUMN commit_metrics_report.realm_id IS 'Realm ID for multi-tenancy'; COMMENT ON COLUMN commit_metrics_report.operation IS 'Commit operation type: append, overwrite, delete, replace'; COMMENT ON COLUMN commit_metrics_report.otel_trace_id IS 'OpenTelemetry trace ID from HTTP headers'; -COMMENT ON COLUMN commit_metrics_report.principal_role_ids IS 'JSON array of activated principal role IDs'; -- Index for retention cleanup by timestamp CREATE INDEX IF NOT EXISTS idx_commit_report_timestamp From 714b6142c86ec079d895d7a59c08987a860802e3 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 09:55:37 -0800 Subject: [PATCH 61/67] refactor: Remove principal_role_ids from model classes and tests Per PR review feedback from dimas-b: Polaris supports external IdP and PDP (e.g. Keycloak and OPA), and the roles stored in metrics tables may not be aligned with AuthZ decisions. Removed: - PRINCIPAL_ROLE_IDS column constant from model classes - getRoles() method from model interfaces - Roles parsing/serialization from fromResultSet() and toMap() - Roles-related tests from test classes --- .../jdbc/models/ModelCommitMetricsReport.java | 43 --- .../ModelCommitMetricsReportConverter.java | 22 -- .../jdbc/models/ModelScanMetricsReport.java | 43 --- .../ModelScanMetricsReportConverter.java | 22 -- .../jdbc/MetricsReportPersistenceTest.java | 291 ------------------ .../models/ModelCommitMetricsReportTest.java | 18 -- .../models/ModelScanMetricsReportTest.java | 18 -- 7 files changed, 457 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java index 2a90411584..e181d702d0 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java @@ -18,29 +18,20 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; import jakarta.annotation.Nullable; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.polaris.immutables.PolarisImmutable; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; -import org.immutables.value.Value; /** Model class for commit_metrics_report table - stores commit metrics as first-class entities. */ @PolarisImmutable public interface ModelCommitMetricsReport extends Converter { String TABLE_NAME = "COMMIT_METRICS_REPORT"; - /** ObjectMapper for JSON serialization/deserialization of roles. */ - ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - // Column names String REPORT_ID = "report_id"; String REALM_ID = "realm_id"; @@ -74,7 +65,6 @@ public interface ModelCommitMetricsReport extends Converter ALL_COLUMNS = @@ -111,7 +101,6 @@ public interface ModelCommitMetricsReport extends Converter getRoles() { - return Set.of(); - } - @Override default ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException { - // Parse principal_role_ids JSON array - Set roles = Set.of(); - String rolesJson = rs.getString(PRINCIPAL_ROLE_IDS); - if (rolesJson != null && !rolesJson.isBlank()) { - try { - roles = - new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); - } catch (JsonProcessingException e) { - // Log and continue with empty roles - roles = Set.of(); - } - } - return ImmutableModelCommitMetricsReport.builder() .reportId(rs.getString(REPORT_ID)) .realmId(rs.getString(REALM_ID)) @@ -245,7 +212,6 @@ default ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException .totalFileSizeBytes(rs.getLong(TOTAL_FILE_SIZE_BYTES)) .totalDurationMs(rs.getLong(TOTAL_DURATION_MS)) .attempts(rs.getInt(ATTEMPTS)) - .roles(roles) .metadata(rs.getString(METADATA)) .build(); } @@ -286,18 +252,9 @@ default Map toMap(DatabaseType databaseType) { map.put(TOTAL_DURATION_MS, getTotalDurationMs()); map.put(ATTEMPTS, getAttempts()); - // Serialize roles to JSON array - String rolesJson; - try { - rolesJson = OBJECT_MAPPER.writeValueAsString(getRoles()); - } catch (JsonProcessingException e) { - rolesJson = "[]"; - } if (databaseType.equals(DatabaseType.POSTGRES)) { - map.put(PRINCIPAL_ROLE_IDS, toJsonbPGobject(rolesJson)); map.put(METADATA, toJsonbPGobject(getMetadata() != null ? getMetadata() : "{}")); } else { - map.put(PRINCIPAL_ROLE_IDS, rolesJson); map.put(METADATA, getMetadata() != null ? getMetadata() : "{}"); } return map; diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java index 05e0556360..b683edd0e3 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java @@ -18,15 +18,9 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.HashSet; -import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; /** @@ -35,23 +29,8 @@ */ public class ModelCommitMetricsReportConverter implements Converter { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - @Override public ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException { - // Parse principal_role_ids JSON array - Set roles = Set.of(); - String rolesJson = rs.getString(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS); - if (rolesJson != null && !rolesJson.isBlank()) { - try { - roles = - new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); - } catch (JsonProcessingException e) { - // Log and continue with empty roles - roles = Set.of(); - } - } - return ImmutableModelCommitMetricsReport.builder() .reportId(rs.getString(ModelCommitMetricsReport.REPORT_ID)) .realmId(rs.getString(ModelCommitMetricsReport.REALM_ID)) @@ -88,7 +67,6 @@ public ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException .totalFileSizeBytes(rs.getLong(ModelCommitMetricsReport.TOTAL_FILE_SIZE_BYTES)) .totalDurationMs(rs.getObject(ModelCommitMetricsReport.TOTAL_DURATION_MS, Long.class)) .attempts(rs.getObject(ModelCommitMetricsReport.ATTEMPTS, Integer.class)) - .roles(roles) .metadata(rs.getString(ModelCommitMetricsReport.METADATA)) .build(); } diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java index 5d99e15507..c81a70f5b4 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java @@ -18,29 +18,20 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; import jakarta.annotation.Nullable; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.polaris.immutables.PolarisImmutable; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; -import org.immutables.value.Value; /** Model class for scan_metrics_report table - stores scan metrics as first-class entities. */ @PolarisImmutable public interface ModelScanMetricsReport extends Converter { String TABLE_NAME = "SCAN_METRICS_REPORT"; - /** ObjectMapper for JSON serialization/deserialization of roles. */ - ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - // Column names String REPORT_ID = "report_id"; String REALM_ID = "realm_id"; @@ -74,7 +65,6 @@ public interface ModelScanMetricsReport extends Converter ALL_COLUMNS = @@ -111,7 +101,6 @@ public interface ModelScanMetricsReport extends Converter getRoles() { - return Set.of(); - } - @Override default ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { - // Parse principal_role_ids JSON array - Set roles = Set.of(); - String rolesJson = rs.getString(PRINCIPAL_ROLE_IDS); - if (rolesJson != null && !rolesJson.isBlank()) { - try { - roles = - new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); - } catch (JsonProcessingException e) { - // Log and continue with empty roles - roles = Set.of(); - } - } - return ImmutableModelScanMetricsReport.builder() .reportId(rs.getString(REPORT_ID)) .realmId(rs.getString(REALM_ID)) @@ -249,7 +216,6 @@ default ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { .positionalDeleteFiles(rs.getLong(POSITIONAL_DELETE_FILES)) .indexedDeleteFiles(rs.getLong(INDEXED_DELETE_FILES)) .totalDeleteFileSizeBytes(rs.getLong(TOTAL_DELETE_FILE_SIZE_BYTES)) - .roles(roles) .metadata(rs.getString(METADATA)) .build(); } @@ -290,18 +256,9 @@ default Map toMap(DatabaseType databaseType) { map.put(INDEXED_DELETE_FILES, getIndexedDeleteFiles()); map.put(TOTAL_DELETE_FILE_SIZE_BYTES, getTotalDeleteFileSizeBytes()); - // Serialize roles to JSON array - String rolesJson; - try { - rolesJson = OBJECT_MAPPER.writeValueAsString(getRoles()); - } catch (JsonProcessingException e) { - rolesJson = "[]"; - } if (databaseType.equals(DatabaseType.POSTGRES)) { - map.put(PRINCIPAL_ROLE_IDS, toJsonbPGobject(rolesJson)); map.put(METADATA, toJsonbPGobject(getMetadata() != null ? getMetadata() : "{}")); } else { - map.put(PRINCIPAL_ROLE_IDS, rolesJson); map.put(METADATA, getMetadata() != null ? getMetadata() : "{}"); } return map; diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java index bea27ee381..1abbc0389c 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java @@ -18,15 +18,9 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.HashSet; -import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; /** @@ -35,23 +29,8 @@ */ public class ModelScanMetricsReportConverter implements Converter { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - @Override public ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { - // Parse principal_role_ids JSON array - Set roles = Set.of(); - String rolesJson = rs.getString(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS); - if (rolesJson != null && !rolesJson.isBlank()) { - try { - roles = - new HashSet<>(OBJECT_MAPPER.readValue(rolesJson, new TypeReference>() {})); - } catch (JsonProcessingException e) { - // Log and continue with empty roles - roles = Set.of(); - } - } - return ImmutableModelScanMetricsReport.builder() .reportId(rs.getString(ModelScanMetricsReport.REPORT_ID)) .realmId(rs.getString(ModelScanMetricsReport.REALM_ID)) @@ -85,7 +64,6 @@ public ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { .positionalDeleteFiles(rs.getLong(ModelScanMetricsReport.POSITIONAL_DELETE_FILES)) .indexedDeleteFiles(rs.getLong(ModelScanMetricsReport.INDEXED_DELETE_FILES)) .totalDeleteFileSizeBytes(rs.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)) - .roles(roles) .metadata(rs.getString(ModelScanMetricsReport.METADATA)) .build(); } diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index d77be512d7..57f460a93e 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -23,9 +23,7 @@ import java.io.InputStream; import java.sql.SQLException; -import java.util.List; import java.util.Optional; -import java.util.Set; import java.util.UUID; import javax.sql.DataSource; import org.apache.polaris.core.PolarisDefaultDiagServiceImpl; @@ -632,295 +630,6 @@ void testDeleteAllMetricsReportsOlderThan_OlderSchema_ReturnsZero() { assertThat(deleted).isEqualTo(0); } - @Test - void testWriteScanMetricsReportWithRoles() { - ModelScanMetricsReport report = - ImmutableModelScanMetricsReport.builder() - .reportId(UUID.randomUUID().toString()) - .realmId("TEST_REALM") - .catalogId(12345L) - .namespace("db.schema") - .tableId(67890L) - .timestampMs(System.currentTimeMillis()) - .snapshotId(12345L) - .schemaId(1) - .resultDataFiles(10L) - .resultDeleteFiles(2L) - .totalFileSizeBytes(1024000L) - .totalDataManifests(5L) - .totalDeleteManifests(1L) - .scannedDataManifests(3L) - .scannedDeleteManifests(1L) - .skippedDataManifests(2L) - .skippedDeleteManifests(0L) - .skippedDataFiles(5L) - .skippedDeleteFiles(0L) - .totalPlanningDurationMs(150L) - .equalityDeleteFiles(1L) - .positionalDeleteFiles(1L) - .indexedDeleteFiles(0L) - .totalDeleteFileSizeBytes(10240L) - .principalName("test-user") - .requestId("req-123") - .otelTraceId("trace-abc") - .otelSpanId("span-xyz") - .reportTraceId("report-trace-roles") - .roles(Set.of("admin", "data_engineer", "analyst")) - .build(); - - // Should not throw - roles are serialized as JSON array in principal_role_ids column - persistence.writeScanMetricsReport(report); - } - - @Test - void testWriteCommitMetricsReportWithRoles() { - ModelCommitMetricsReport report = - ImmutableModelCommitMetricsReport.builder() - .reportId(UUID.randomUUID().toString()) - .realmId("TEST_REALM") - .catalogId(12345L) - .namespace("db.schema") - .tableId(67890L) - .timestampMs(System.currentTimeMillis()) - .snapshotId(12345L) - .sequenceNumber(1L) - .operation("append") - .addedDataFiles(5L) - .removedDataFiles(0L) - .totalDataFiles(100L) - .addedDeleteFiles(0L) - .removedDeleteFiles(0L) - .totalDeleteFiles(2L) - .addedEqualityDeleteFiles(0L) - .removedEqualityDeleteFiles(0L) - .addedPositionalDeleteFiles(0L) - .removedPositionalDeleteFiles(0L) - .addedRecords(1000L) - .removedRecords(0L) - .totalRecords(50000L) - .addedFileSizeBytes(102400L) - .removedFileSizeBytes(0L) - .totalFileSizeBytes(5120000L) - .totalDurationMs(250L) - .attempts(1) - .principalName("test-user") - .requestId("req-456") - .otelTraceId("trace-def") - .otelSpanId("span-uvw") - .reportTraceId("report-trace-roles") - .roles(Set.of("admin", "data_engineer")) - .build(); - - // Should not throw - roles are serialized as JSON array in principal_role_ids column - persistence.writeCommitMetricsReport(report); - } - - @Test - void testScanMetricsReportRolesAreReadBack() { - String reportId = UUID.randomUUID().toString(); - String otelTraceId = "otel-trace-roles-read-" + UUID.randomUUID(); - Set expectedRoles = Set.of("admin", "data_engineer", "analyst"); - - ModelScanMetricsReport report = - ImmutableModelScanMetricsReport.builder() - .reportId(reportId) - .realmId("TEST_REALM") - .catalogId(12345L) - .namespace("db.schema") - .tableId(67890L) - .timestampMs(System.currentTimeMillis()) - .snapshotId(12345L) - .schemaId(1) - .resultDataFiles(10L) - .resultDeleteFiles(2L) - .totalFileSizeBytes(1024000L) - .totalDataManifests(5L) - .totalDeleteManifests(1L) - .scannedDataManifests(3L) - .scannedDeleteManifests(1L) - .skippedDataManifests(2L) - .skippedDeleteManifests(0L) - .skippedDataFiles(5L) - .skippedDeleteFiles(0L) - .totalPlanningDurationMs(150L) - .equalityDeleteFiles(1L) - .positionalDeleteFiles(1L) - .indexedDeleteFiles(0L) - .totalDeleteFileSizeBytes(10240L) - .principalName("test-user") - .requestId("req-123") - .otelTraceId(otelTraceId) - .otelSpanId("span-xyz") - .reportTraceId("report-trace-123") - .roles(expectedRoles) - .build(); - - persistence.writeScanMetricsReport(report); - - // Query by otel trace ID and verify roles are returned - List results = - persistence.queryScanMetricsReportsByTraceId(otelTraceId); - - assertThat(results).hasSize(1); - assertThat(results.get(0).getReportId()).isEqualTo(reportId); - assertThat(results.get(0).getRoles()).containsExactlyInAnyOrderElementsOf(expectedRoles); - } - - @Test - void testCommitMetricsReportRolesAreReadBack() { - String reportId = UUID.randomUUID().toString(); - String otelTraceId = "otel-trace-commit-roles-read-" + UUID.randomUUID(); - Set expectedRoles = Set.of("admin", "data_engineer"); - - ModelCommitMetricsReport report = - ImmutableModelCommitMetricsReport.builder() - .reportId(reportId) - .realmId("TEST_REALM") - .catalogId(12345L) - .namespace("db.schema") - .tableId(67890L) - .timestampMs(System.currentTimeMillis()) - .snapshotId(12345L) - .sequenceNumber(1L) - .operation("append") - .addedDataFiles(5L) - .removedDataFiles(0L) - .totalDataFiles(100L) - .addedDeleteFiles(0L) - .removedDeleteFiles(0L) - .totalDeleteFiles(2L) - .addedEqualityDeleteFiles(0L) - .removedEqualityDeleteFiles(0L) - .addedPositionalDeleteFiles(0L) - .removedPositionalDeleteFiles(0L) - .addedRecords(1000L) - .removedRecords(0L) - .totalRecords(50000L) - .addedFileSizeBytes(102400L) - .removedFileSizeBytes(0L) - .totalFileSizeBytes(5120000L) - .totalDurationMs(250L) - .attempts(1) - .principalName("test-user") - .requestId("req-456") - .otelTraceId(otelTraceId) - .otelSpanId("span-uvw") - .reportTraceId("report-trace-456") - .roles(expectedRoles) - .build(); - - persistence.writeCommitMetricsReport(report); - - // Query by otel trace ID and verify roles are returned - List results = - persistence.queryCommitMetricsReportsByTraceId(otelTraceId); - - assertThat(results).hasSize(1); - assertThat(results.get(0).getReportId()).isEqualTo(reportId); - assertThat(results.get(0).getRoles()).containsExactlyInAnyOrderElementsOf(expectedRoles); - } - - @Test - void testScanMetricsReportWithEmptyRoles() { - String reportId = UUID.randomUUID().toString(); - String otelTraceId = "otel-trace-empty-roles-" + UUID.randomUUID(); - - ModelScanMetricsReport report = - ImmutableModelScanMetricsReport.builder() - .reportId(reportId) - .realmId("TEST_REALM") - .catalogId(12345L) - .namespace("db.schema") - .tableId(67890L) - .timestampMs(System.currentTimeMillis()) - .snapshotId(12345L) - .schemaId(1) - .resultDataFiles(10L) - .resultDeleteFiles(2L) - .totalFileSizeBytes(1024000L) - .totalDataManifests(5L) - .totalDeleteManifests(1L) - .scannedDataManifests(3L) - .scannedDeleteManifests(1L) - .skippedDataManifests(2L) - .skippedDeleteManifests(0L) - .skippedDataFiles(5L) - .skippedDeleteFiles(0L) - .totalPlanningDurationMs(150L) - .equalityDeleteFiles(1L) - .positionalDeleteFiles(1L) - .indexedDeleteFiles(0L) - .totalDeleteFileSizeBytes(10240L) - .principalName("test-user") - .requestId("req-123") - .otelTraceId(otelTraceId) - .otelSpanId("span-xyz") - .reportTraceId("report-trace-empty") - // No roles set - uses default empty set - .build(); - - persistence.writeScanMetricsReport(report); - - // Query by otel trace ID and verify empty roles - List results = - persistence.queryScanMetricsReportsByTraceId(otelTraceId); - - assertThat(results).hasSize(1); - assertThat(results.get(0).getRoles()).isEmpty(); - } - - @Test - void testScanMetricsReportRolesViaTimeRangeQuery() { - String reportId = UUID.randomUUID().toString(); - long timestamp = System.currentTimeMillis(); - Set expectedRoles = Set.of("role1", "role2"); - - ModelScanMetricsReport report = - ImmutableModelScanMetricsReport.builder() - .reportId(reportId) - .realmId("TEST_REALM") - .catalogId(22222L) - .namespace("db.schema") - .tableId(66666L) - .timestampMs(timestamp) - .snapshotId(12345L) - .schemaId(1) - .resultDataFiles(10L) - .resultDeleteFiles(2L) - .totalFileSizeBytes(1024000L) - .totalDataManifests(5L) - .totalDeleteManifests(1L) - .scannedDataManifests(3L) - .scannedDeleteManifests(1L) - .skippedDataManifests(2L) - .skippedDeleteManifests(0L) - .skippedDataFiles(5L) - .skippedDeleteFiles(0L) - .totalPlanningDurationMs(150L) - .equalityDeleteFiles(1L) - .positionalDeleteFiles(1L) - .indexedDeleteFiles(0L) - .totalDeleteFileSizeBytes(10240L) - .principalName("test-user") - .requestId("req-123") - .otelTraceId("trace-abc") - .otelSpanId("span-xyz") - .reportTraceId("report-trace-time-query") - .roles(expectedRoles) - .build(); - - persistence.writeScanMetricsReport(report); - - // Query by time range and verify roles are returned - List results = - persistence.queryScanMetricsReports( - 22222L, 66666L, timestamp - 1000, timestamp + 1000, null, 100); - - assertThat(results).hasSize(1); - assertThat(results.get(0).getReportId()).isEqualTo(reportId); - assertThat(results.get(0).getRoles()).containsExactlyInAnyOrderElementsOf(expectedRoles); - } - /** * Creates a JdbcBasePersistenceImpl with the specified schema version. This uses the same * datasource but with a different reported schema version to test graceful degradation. diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java index 0ad9a412c6..557ee6a9c9 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java @@ -18,7 +18,6 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; -import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -26,7 +25,6 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Map; -import java.util.Set; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; import org.junit.jupiter.api.Test; import org.postgresql.util.PGobject; @@ -65,8 +63,6 @@ public class ModelCommitMetricsReportTest { private static final long TEST_TOTAL_FILE_SIZE = 10240000L; private static final long TEST_TOTAL_DURATION = 250L; private static final int TEST_ATTEMPTS = 1; - private static final Set TEST_ROLES = Set.of("admin", "data_engineer", "analyst"); - private static final String TEST_ROLES_JSON = "[\"admin\",\"data_engineer\",\"analyst\"]"; private static final String TEST_METADATA = "{\"commit\":\"info\"}"; @Test @@ -127,8 +123,6 @@ public void testFromResultSet() throws SQLException { when(mockResultSet.getLong(ModelCommitMetricsReport.TOTAL_DURATION_MS)) .thenReturn(TEST_TOTAL_DURATION); when(mockResultSet.getInt(ModelCommitMetricsReport.ATTEMPTS)).thenReturn(TEST_ATTEMPTS); - when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS)) - .thenReturn(TEST_ROLES_JSON); when(mockResultSet.getString(ModelCommitMetricsReport.METADATA)).thenReturn(TEST_METADATA); ModelCommitMetricsReport result = @@ -146,7 +140,6 @@ public void testFromResultSet() throws SQLException { assertEquals(TEST_ADDED_RECORDS, result.getAddedRecords()); assertEquals(TEST_TOTAL_DURATION, result.getTotalDurationMs()); assertEquals(TEST_ATTEMPTS, result.getAttempts()); - assertThat(result.getRoles()).containsExactlyInAnyOrderElementsOf(TEST_ROLES); assertEquals(TEST_METADATA, result.getMetadata()); } @@ -161,9 +154,6 @@ public void testToMapWithH2DatabaseType() { assertEquals(TEST_SNAPSHOT_ID, resultMap.get(ModelCommitMetricsReport.SNAPSHOT_ID)); assertEquals(TEST_OPERATION, resultMap.get(ModelCommitMetricsReport.OPERATION)); assertEquals(TEST_ADDED_DATA_FILES, resultMap.get(ModelCommitMetricsReport.ADDED_DATA_FILES)); - // principal_role_ids should be serialized as a JSON string for H2 - String rolesJson = (String) resultMap.get(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS); - assertThat(rolesJson).contains("admin", "data_engineer", "analyst"); assertEquals(TEST_METADATA, resultMap.get(ModelCommitMetricsReport.METADATA)); } @@ -174,10 +164,6 @@ public void testToMapWithPostgresType() { Map resultMap = report.toMap(DatabaseType.POSTGRES); assertEquals(TEST_REPORT_ID, resultMap.get(ModelCommitMetricsReport.REPORT_ID)); - // principal_role_ids should be serialized as a PGobject with type "jsonb" for Postgres - PGobject rolesPgObject = (PGobject) resultMap.get(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS); - assertEquals("jsonb", rolesPgObject.getType()); - assertThat(rolesPgObject.getValue()).contains("admin", "data_engineer", "analyst"); PGobject pgObject = (PGobject) resultMap.get(ModelCommitMetricsReport.METADATA); assertEquals("jsonb", pgObject.getType()); assertEquals(TEST_METADATA, pgObject.getValue()); @@ -246,8 +232,6 @@ public void testConverterFromResultSet() throws SQLException { .thenReturn(TEST_TOTAL_DURATION); when(mockResultSet.getObject(ModelCommitMetricsReport.ATTEMPTS, Integer.class)) .thenReturn(TEST_ATTEMPTS); - when(mockResultSet.getString(ModelCommitMetricsReport.PRINCIPAL_ROLE_IDS)) - .thenReturn(TEST_ROLES_JSON); when(mockResultSet.getString(ModelCommitMetricsReport.METADATA)).thenReturn(TEST_METADATA); ModelCommitMetricsReport result = converter.fromResultSet(mockResultSet); @@ -255,7 +239,6 @@ public void testConverterFromResultSet() throws SQLException { assertEquals(TEST_REPORT_ID, result.getReportId()); assertEquals(TEST_REALM_ID, result.getRealmId()); assertEquals(TEST_CATALOG_ID, result.getCatalogId()); - assertThat(result.getRoles()).containsExactlyInAnyOrderElementsOf(TEST_ROLES); assertEquals(TEST_METADATA, result.getMetadata()); } @@ -291,7 +274,6 @@ private ModelCommitMetricsReport createTestReport() { .totalFileSizeBytes(TEST_TOTAL_FILE_SIZE) .totalDurationMs(TEST_TOTAL_DURATION) .attempts(TEST_ATTEMPTS) - .roles(TEST_ROLES) .metadata(TEST_METADATA) .build(); } diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java index 5edb05b7b9..d2186297c7 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java @@ -18,7 +18,6 @@ */ package org.apache.polaris.persistence.relational.jdbc.models; -import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -26,7 +25,6 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Map; -import java.util.Set; import org.apache.polaris.persistence.relational.jdbc.DatabaseType; import org.junit.jupiter.api.Test; import org.postgresql.util.PGobject; @@ -65,8 +63,6 @@ public class ModelScanMetricsReportTest { private static final long TEST_POSITIONAL_DELETE_FILES = 1L; private static final long TEST_INDEXED_DELETE_FILES = 0L; private static final long TEST_DELETE_FILE_SIZE = 2048L; - private static final Set TEST_ROLES = Set.of("admin", "data_engineer", "analyst"); - private static final String TEST_ROLES_JSON = "[\"admin\",\"data_engineer\",\"analyst\"]"; private static final String TEST_METADATA = "{\"custom\":\"value\"}"; @Test @@ -127,8 +123,6 @@ public void testFromResultSet() throws SQLException { .thenReturn(TEST_INDEXED_DELETE_FILES); when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)) .thenReturn(TEST_DELETE_FILE_SIZE); - when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS)) - .thenReturn(TEST_ROLES_JSON); when(mockResultSet.getString(ModelScanMetricsReport.METADATA)).thenReturn(TEST_METADATA); ModelScanMetricsReport result = ModelScanMetricsReport.CONVERTER.fromResultSet(mockResultSet); @@ -146,7 +140,6 @@ public void testFromResultSet() throws SQLException { assertEquals(TEST_RESULT_DATA_FILES, result.getResultDataFiles()); assertEquals(TEST_TOTAL_FILE_SIZE, result.getTotalFileSizeBytes()); assertEquals(TEST_PLANNING_DURATION, result.getTotalPlanningDurationMs()); - assertThat(result.getRoles()).containsExactlyInAnyOrderElementsOf(TEST_ROLES); assertEquals(TEST_METADATA, result.getMetadata()); } @@ -163,9 +156,6 @@ public void testToMapWithH2DatabaseType() { assertEquals(TEST_TABLE_ID, resultMap.get(ModelScanMetricsReport.TABLE_ID_COL)); assertEquals(TEST_TIMESTAMP_MS, resultMap.get(ModelScanMetricsReport.TIMESTAMP_MS)); assertEquals(TEST_RESULT_DATA_FILES, resultMap.get(ModelScanMetricsReport.RESULT_DATA_FILES)); - // principal_role_ids should be serialized as a JSON string for H2 - String rolesJson = (String) resultMap.get(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS); - assertThat(rolesJson).contains("admin", "data_engineer", "analyst"); assertEquals(TEST_METADATA, resultMap.get(ModelScanMetricsReport.METADATA)); } @@ -176,10 +166,6 @@ public void testToMapWithPostgresType() { Map resultMap = report.toMap(DatabaseType.POSTGRES); assertEquals(TEST_REPORT_ID, resultMap.get(ModelScanMetricsReport.REPORT_ID)); - // principal_role_ids should be serialized as a PGobject with type "jsonb" for Postgres - PGobject rolesPgObject = (PGobject) resultMap.get(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS); - assertEquals("jsonb", rolesPgObject.getType()); - assertThat(rolesPgObject.getValue()).contains("admin", "data_engineer", "analyst"); PGobject pgObject = (PGobject) resultMap.get(ModelScanMetricsReport.METADATA); assertEquals("jsonb", pgObject.getType()); assertEquals(TEST_METADATA, pgObject.getValue()); @@ -246,8 +232,6 @@ public void testConverterFromResultSet() throws SQLException { .thenReturn(TEST_INDEXED_DELETE_FILES); when(mockResultSet.getLong(ModelScanMetricsReport.TOTAL_DELETE_FILE_SIZE_BYTES)) .thenReturn(TEST_DELETE_FILE_SIZE); - when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_ROLE_IDS)) - .thenReturn(TEST_ROLES_JSON); when(mockResultSet.getString(ModelScanMetricsReport.METADATA)).thenReturn(TEST_METADATA); ModelScanMetricsReport result = converter.fromResultSet(mockResultSet); @@ -255,7 +239,6 @@ public void testConverterFromResultSet() throws SQLException { assertEquals(TEST_REPORT_ID, result.getReportId()); assertEquals(TEST_REALM_ID, result.getRealmId()); assertEquals(TEST_CATALOG_ID, result.getCatalogId()); - assertThat(result.getRoles()).containsExactlyInAnyOrderElementsOf(TEST_ROLES); assertEquals(TEST_METADATA, result.getMetadata()); } @@ -287,7 +270,6 @@ private ModelScanMetricsReport createTestReport() { .positionalDeleteFiles(TEST_POSITIONAL_DELETE_FILES) .indexedDeleteFiles(TEST_INDEXED_DELETE_FILES) .totalDeleteFileSizeBytes(TEST_DELETE_FILE_SIZE) - .roles(TEST_ROLES) .metadata(TEST_METADATA) .build(); } From 4d0d395d6335276a9976467dc599905c55c9a68c Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 10:19:49 -0800 Subject: [PATCH 62/67] refactor: Move metrics methods to JdbcMetricsPersistence and wire to CDI Addresses PR review comments from dimas-b: - r2775335558: Why not keep these methods in JdbcMetricsPersistence? - r2775346205: Do you plan wiring JdbcMetricsPersistence to CDI? Changes: - Move all metrics persistence methods from JdbcBasePersistenceImpl to JdbcMetricsPersistence, making it self-contained - Create JdbcMetricsPersistenceProducer as a CDI producer that creates JdbcMetricsPersistence instances for the relational-jdbc backend - Update ServiceProducers.metricsPersistence() to use Instance pattern to select the appropriate implementation based on persistence type - Update MetricsReportPersistenceTest to use JdbcMetricsPersistence directly --- .../jdbc/JdbcBasePersistenceImpl.java | 353 ------------------ .../jdbc/JdbcMetricsPersistence.java | 349 ++++++++++++++++- .../jdbc/JdbcMetricsPersistenceProducer.java | 100 +++++ .../jdbc/MetricsReportPersistenceTest.java | 96 ++--- .../service/config/ServiceProducers.java | 19 +- 5 files changed, 486 insertions(+), 431 deletions(-) create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java index cb8dce09d0..9401df2dd0 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java @@ -69,15 +69,11 @@ import org.apache.polaris.core.storage.PolarisStorageIntegrationProvider; import org.apache.polaris.core.storage.StorageLocation; import org.apache.polaris.persistence.relational.jdbc.models.EntityNameLookupRecordConverter; -import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; -import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReportConverter; import org.apache.polaris.persistence.relational.jdbc.models.ModelEntity; import org.apache.polaris.persistence.relational.jdbc.models.ModelEvent; import org.apache.polaris.persistence.relational.jdbc.models.ModelGrantRecord; import org.apache.polaris.persistence.relational.jdbc.models.ModelPolicyMappingRecord; import org.apache.polaris.persistence.relational.jdbc.models.ModelPrincipalAuthenticationData; -import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; -import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReportConverter; import org.apache.polaris.persistence.relational.jdbc.models.SchemaVersion; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -96,10 +92,6 @@ public class JdbcBasePersistenceImpl implements BasePersistence, IntegrationPers // The max number of components a location can have before the optimized sibling check is not used private static final int MAX_LOCATION_COMPONENTS = 40; - // Minimum schema version that includes metrics tables (scan_metrics_report, - // commit_metrics_report) - private static final int METRICS_TABLES_MIN_SCHEMA_VERSION = 4; - public JdbcBasePersistenceImpl( PolarisDiagnostics diagnostics, DatasourceOperations databaseOperations, @@ -115,18 +107,6 @@ public JdbcBasePersistenceImpl( this.schemaVersion = schemaVersion; } - /** - * Returns true if the current schema version supports metrics persistence tables. - * - *

      Metrics tables (scan_metrics_report, commit_metrics_report) were introduced in schema - * version 4. On older schemas, metrics persistence operations will be no-ops. - * - * @return true if schema version >= 4, false otherwise - */ - public boolean supportsMetricsPersistence() { - return this.schemaVersion >= METRICS_TABLES_MIN_SCHEMA_VERSION; - } - @Override public long generateNewId(@Nonnull PolarisCallContext callCtx) { return IdGenerator.getIdGenerator().nextId(); @@ -335,339 +315,6 @@ public void writeEvents(@Nonnull List events) { } } - /** - * Writes a scan metrics report to the database as a first-class entity. - * - *

      This method requires schema version 4 or higher. On older schemas, this method is a no-op. - * - * @param report the scan metrics report to persist - */ - public void writeScanMetricsReport(@Nonnull ModelScanMetricsReport report) { - if (!supportsMetricsPersistence()) { - LOGGER.debug( - "Schema version {} does not support metrics tables. Skipping scan metrics write.", - schemaVersion); - return; - } - try { - PreparedQuery pq = - QueryGenerator.generateInsertQueryWithoutRealmId( - ModelScanMetricsReport.ALL_COLUMNS, - ModelScanMetricsReport.TABLE_NAME, - report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); - int updated = datasourceOperations.executeUpdate(pq); - if (updated == 0) { - throw new SQLException("Scan metrics report was not inserted."); - } - } catch (SQLException e) { - throw new RuntimeException( - String.format("Failed to write scan metrics report due to %s", e.getMessage()), e); - } - } - - /** - * Writes a commit metrics report to the database as a first-class entity. - * - *

      This method requires schema version 4 or higher. On older schemas, this method is a no-op. - * - * @param report the commit metrics report to persist - */ - public void writeCommitMetricsReport(@Nonnull ModelCommitMetricsReport report) { - if (!supportsMetricsPersistence()) { - LOGGER.debug( - "Schema version {} does not support metrics tables. Skipping commit metrics write.", - schemaVersion); - return; - } - try { - PreparedQuery pq = - QueryGenerator.generateInsertQueryWithoutRealmId( - ModelCommitMetricsReport.ALL_COLUMNS, - ModelCommitMetricsReport.TABLE_NAME, - report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); - int updated = datasourceOperations.executeUpdate(pq); - if (updated == 0) { - throw new SQLException("Commit metrics report was not inserted."); - } - } catch (SQLException e) { - throw new RuntimeException( - String.format("Failed to write commit metrics report due to %s", e.getMessage()), e); - } - } - - /** - * Retrieves scan metrics reports for a specific table within a time range. - * - *

      This method requires schema version 4 or higher. On older schemas, returns an empty list. - * - * @param catalogId the catalog entity ID - * @param tableId the table entity ID - * @param startTimeMs start of time range (inclusive), or null for no lower bound - * @param endTimeMs end of time range (exclusive), or null for no upper bound - * @param limit maximum number of results to return - * @return list of scan metrics reports matching the criteria, or empty list if schema version - * < 4 - */ - /** - * Retrieves scan metrics reports for a specific table within a time range. - * - *

      This method requires schema version 4 or higher. On older schemas, returns an empty list. - * - * @param catalogId the catalog entity ID - * @param tableId the table entity ID - * @param startTimeMs start of time range (inclusive), or null for no lower bound - * @param endTimeMs end of time range (exclusive), or null for no upper bound - * @param lastReportId cursor for pagination: return results after this report ID, or null for - * first page - * @param limit maximum number of results to return - * @return list of scan metrics reports matching the criteria, or empty list if schema version - * < 4 - */ - @Nonnull - public List queryScanMetricsReports( - long catalogId, - long tableId, - @Nullable Long startTimeMs, - @Nullable Long endTimeMs, - @Nullable String lastReportId, - int limit) { - if (!supportsMetricsPersistence()) { - return Collections.emptyList(); - } - try { - StringBuilder whereClause = new StringBuilder(); - whereClause.append("realm_id = ? AND catalog_id = ? AND table_id = ?"); - List values = new ArrayList<>(List.of(realmId, catalogId, tableId)); - - if (startTimeMs != null) { - whereClause.append(" AND timestamp_ms >= ?"); - values.add(startTimeMs); - } - if (endTimeMs != null) { - whereClause.append(" AND timestamp_ms < ?"); - values.add(endTimeMs); - } - if (lastReportId != null) { - whereClause.append(" AND report_id > ?"); - values.add(lastReportId); - } - - String sql = - "SELECT * FROM " - + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) - + " WHERE " - + whereClause - + " ORDER BY report_id ASC LIMIT " - + limit; - - PreparedQuery query = new PreparedQuery(sql, values); - var results = - datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); - return results == null ? Collections.emptyList() : results; - } catch (SQLException e) { - throw new RuntimeException( - String.format("Failed to query scan metrics reports due to %s", e.getMessage()), e); - } - } - - /** - * Retrieves commit metrics reports for a specific table within a time range. - * - *

      This method requires schema version 4 or higher. On older schemas, returns an empty list. - * - * @param catalogId the catalog entity ID - * @param tableId the table entity ID - * @param startTimeMs start of time range (inclusive), or null for no lower bound - * @param endTimeMs end of time range (exclusive), or null for no upper bound - * @param lastReportId cursor for pagination: return results after this report ID, or null for - * first page - * @param limit maximum number of results to return - * @return list of commit metrics reports matching the criteria, or empty list if schema version - * < 4 - */ - @Nonnull - public List queryCommitMetricsReports( - long catalogId, - long tableId, - @Nullable Long startTimeMs, - @Nullable Long endTimeMs, - @Nullable String lastReportId, - int limit) { - if (!supportsMetricsPersistence()) { - return Collections.emptyList(); - } - try { - List values = new ArrayList<>(List.of(realmId, catalogId, tableId)); - - StringBuilder whereClause = new StringBuilder(); - whereClause.append("realm_id = ? AND catalog_id = ? AND table_id = ?"); - - if (startTimeMs != null) { - whereClause.append(" AND timestamp_ms >= ?"); - values.add(startTimeMs); - } - if (endTimeMs != null) { - whereClause.append(" AND timestamp_ms < ?"); - values.add(endTimeMs); - } - if (lastReportId != null) { - whereClause.append(" AND report_id > ?"); - values.add(lastReportId); - } - - String sql = - "SELECT * FROM " - + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) - + " WHERE " - + whereClause - + " ORDER BY report_id ASC LIMIT " - + limit; - - PreparedQuery query = new PreparedQuery(sql, values); - var results = - datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); - return results == null ? Collections.emptyList() : results; - } catch (SQLException e) { - throw new RuntimeException( - String.format("Failed to query commit metrics reports due to %s", e.getMessage()), e); - } - } - - /** - * Retrieves scan metrics reports by OpenTelemetry trace ID. - * - *

      This method requires schema version 4 or higher. On older schemas, returns an empty list. - * - * @param traceId the OpenTelemetry trace ID - * @return list of scan metrics reports with the given trace ID, or empty list if schema version - * < 4 - */ - @Nonnull - public List queryScanMetricsReportsByTraceId(@Nonnull String traceId) { - if (!supportsMetricsPersistence()) { - return Collections.emptyList(); - } - try { - String sql = - "SELECT * FROM " - + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) - + " WHERE realm_id = ? AND otel_trace_id = ? ORDER BY timestamp_ms DESC"; - - PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); - var results = - datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); - return results == null ? Collections.emptyList() : results; - } catch (SQLException e) { - throw new RuntimeException( - String.format( - "Failed to query scan metrics reports by trace ID due to %s", e.getMessage()), - e); - } - } - - /** - * Retrieves commit metrics reports by OpenTelemetry trace ID. - * - *

      This method requires schema version 4 or higher. On older schemas, returns an empty list. - * - * @param traceId the OpenTelemetry trace ID - * @return list of commit metrics reports with the given trace ID, or empty list if schema version - * < 4 - */ - @Nonnull - public List queryCommitMetricsReportsByTraceId( - @Nonnull String traceId) { - if (!supportsMetricsPersistence()) { - return Collections.emptyList(); - } - try { - String sql = - "SELECT * FROM " - + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) - + " WHERE realm_id = ? AND otel_trace_id = ? ORDER BY timestamp_ms DESC"; - - PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); - var results = - datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); - return results == null ? Collections.emptyList() : results; - } catch (SQLException e) { - throw new RuntimeException( - String.format( - "Failed to query commit metrics reports by trace ID due to %s", e.getMessage()), - e); - } - } - - /** - * Deletes scan metrics reports older than the specified timestamp. - * - *

      This method requires schema version 4 or higher. On older schemas, returns 0. - * - * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be - * deleted - * @return the number of reports deleted, or 0 if schema version < 4 - */ - public int deleteScanMetricsReportsOlderThan(long olderThanMs) { - if (!supportsMetricsPersistence()) { - return 0; - } - try { - String sql = - "DELETE FROM " - + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) - + " WHERE realm_id = ? AND timestamp_ms < ?"; - - PreparedQuery query = new PreparedQuery(sql, List.of(realmId, olderThanMs)); - return datasourceOperations.executeUpdate(query); - } catch (SQLException e) { - throw new RuntimeException( - String.format("Failed to delete old scan metrics reports due to %s", e.getMessage()), e); - } - } - - /** - * Deletes commit metrics reports older than the specified timestamp. - * - *

      This method requires schema version 4 or higher. On older schemas, returns 0. - * - * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be - * deleted - * @return the number of reports deleted, or 0 if schema version < 4 - */ - public int deleteCommitMetricsReportsOlderThan(long olderThanMs) { - if (!supportsMetricsPersistence()) { - return 0; - } - try { - String sql = - "DELETE FROM " - + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) - + " WHERE realm_id = ? AND timestamp_ms < ?"; - - PreparedQuery query = new PreparedQuery(sql, List.of(realmId, olderThanMs)); - return datasourceOperations.executeUpdate(query); - } catch (SQLException e) { - throw new RuntimeException( - String.format("Failed to delete old commit metrics reports due to %s", e.getMessage()), - e); - } - } - - /** - * Deletes all metrics reports (both scan and commit) older than the specified timestamp. - * - *

      This method requires schema version 4 or higher. On older schemas, returns 0. - * - * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be - * deleted - * @return the total number of reports deleted (scan + commit), or 0 if schema version < 4 - */ - public int deleteAllMetricsReportsOlderThan(long olderThanMs) { - int scanDeleted = deleteScanMetricsReportsOlderThan(olderThanMs); - int commitDeleted = deleteCommitMetricsReportsOlderThan(olderThanMs); - return scanDeleted + commitDeleted; - } - @Override public void deleteEntity(@Nonnull PolarisCallContext callCtx, @Nonnull PolarisBaseEntity entity) { ModelEntity modelEntity = ModelEntity.fromEntity(entity, schemaVersion); diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java index e9b2244bc8..a4621b5397 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistence.java @@ -18,7 +18,13 @@ */ package org.apache.polaris.persistence.relational.jdbc; +import static org.apache.polaris.persistence.relational.jdbc.QueryGenerator.PreparedQuery; + import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; @@ -30,54 +36,85 @@ import org.apache.polaris.core.persistence.pagination.PageToken; import org.apache.polaris.core.persistence.pagination.Token; import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReportConverter; import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReport; +import org.apache.polaris.persistence.relational.jdbc.models.ModelScanMetricsReportConverter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * JDBC implementation of {@link MetricsPersistence}. * - *

      This class bridges the SPI interface with the existing JDBC persistence implementation, - * converting between SPI record types ({@link ScanMetricsRecord}, {@link CommitMetricsRecord}) and - * JDBC model types ({@link ModelScanMetricsReport}, {@link ModelCommitMetricsReport}). + *

      This class provides direct JDBC persistence for metrics reports, converting between SPI record + * types ({@link ScanMetricsRecord}, {@link CommitMetricsRecord}) and JDBC model types ({@link + * ModelScanMetricsReport}, {@link ModelCommitMetricsReport}). + * + *

      Metrics tables (scan_metrics_report, commit_metrics_report) were introduced in schema version + * 4. On older schemas, all operations are no-ops. */ public class JdbcMetricsPersistence implements MetricsPersistence { - private final JdbcBasePersistenceImpl jdbcPersistence; + private static final Logger LOGGER = LoggerFactory.getLogger(JdbcMetricsPersistence.class); + + // Minimum schema version that includes metrics tables + private static final int METRICS_TABLES_MIN_SCHEMA_VERSION = 4; + + private final DatasourceOperations datasourceOperations; private final String realmId; + private final int schemaVersion; /** * Creates a new JdbcMetricsPersistence instance. * - * @param jdbcPersistence the underlying JDBC persistence implementation + * @param datasourceOperations the datasource operations for JDBC access * @param realmId the realm ID for multi-tenancy + * @param schemaVersion the current schema version */ - public JdbcMetricsPersistence(JdbcBasePersistenceImpl jdbcPersistence, String realmId) { - this.jdbcPersistence = jdbcPersistence; + public JdbcMetricsPersistence( + DatasourceOperations datasourceOperations, String realmId, int schemaVersion) { + this.datasourceOperations = datasourceOperations; this.realmId = realmId; + this.schemaVersion = schemaVersion; + } + + /** + * Returns true if the current schema version supports metrics persistence tables. + * + * @return true if schema version >= 4, false otherwise + */ + public boolean supportsMetricsPersistence() { + return this.schemaVersion >= METRICS_TABLES_MIN_SCHEMA_VERSION; } @Override public void writeScanReport(@Nonnull ScanMetricsRecord record) { - if (!jdbcPersistence.supportsMetricsPersistence()) { + if (!supportsMetricsPersistence()) { + LOGGER.debug( + "Schema version {} does not support metrics tables. Skipping scan metrics write.", + schemaVersion); return; } ModelScanMetricsReport model = SpiModelConverter.toModelScanReport(record, realmId); - jdbcPersistence.writeScanMetricsReport(model); + writeScanMetricsReport(model); } @Override public void writeCommitReport(@Nonnull CommitMetricsRecord record) { - if (!jdbcPersistence.supportsMetricsPersistence()) { + if (!supportsMetricsPersistence()) { + LOGGER.debug( + "Schema version {} does not support metrics tables. Skipping commit metrics write.", + schemaVersion); return; } ModelCommitMetricsReport model = SpiModelConverter.toModelCommitReport(record, realmId); - jdbcPersistence.writeCommitMetricsReport(model); + writeCommitMetricsReport(model); } @Override @Nonnull public Page queryScanReports( @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { - if (!jdbcPersistence.supportsMetricsPersistence()) { + if (!supportsMetricsPersistence()) { return Page.fromItems(List.of()); } @@ -95,7 +132,7 @@ public Page queryScanReports( pageToken.valueAs(ReportIdToken.class).map(ReportIdToken::reportId).orElse(null); List models = - jdbcPersistence.queryScanMetricsReports( + queryScanMetricsReports( criteria.catalogId().getAsLong(), criteria.tableId().getAsLong(), startTimeMs, @@ -117,7 +154,7 @@ public Page queryScanReports( @Nonnull public Page queryCommitReports( @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { - if (!jdbcPersistence.supportsMetricsPersistence()) { + if (!supportsMetricsPersistence()) { return Page.fromItems(List.of()); } @@ -135,7 +172,7 @@ public Page queryCommitReports( pageToken.valueAs(ReportIdToken.class).map(ReportIdToken::reportId).orElse(null); List models = - jdbcPersistence.queryCommitMetricsReports( + queryCommitMetricsReports( criteria.catalogId().getAsLong(), criteria.tableId().getAsLong(), startTimeMs, @@ -152,4 +189,286 @@ public Page queryCommitReports( return Page.page(pageToken, records, nextToken); } + + // ========== Internal JDBC methods ========== + + /** + * Writes a scan metrics report to the database. + * + * @param report the scan metrics report to persist + */ + void writeScanMetricsReport(@Nonnull ModelScanMetricsReport report) { + try { + PreparedQuery pq = + QueryGenerator.generateInsertQueryWithoutRealmId( + ModelScanMetricsReport.ALL_COLUMNS, + ModelScanMetricsReport.TABLE_NAME, + report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); + int updated = datasourceOperations.executeUpdate(pq); + if (updated == 0) { + throw new SQLException("Scan metrics report was not inserted."); + } + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to write scan metrics report due to %s", e.getMessage()), e); + } + } + + /** + * Writes a commit metrics report to the database. + * + * @param report the commit metrics report to persist + */ + void writeCommitMetricsReport(@Nonnull ModelCommitMetricsReport report) { + try { + PreparedQuery pq = + QueryGenerator.generateInsertQueryWithoutRealmId( + ModelCommitMetricsReport.ALL_COLUMNS, + ModelCommitMetricsReport.TABLE_NAME, + report.toMap(datasourceOperations.getDatabaseType()).values().stream().toList()); + int updated = datasourceOperations.executeUpdate(pq); + if (updated == 0) { + throw new SQLException("Commit metrics report was not inserted."); + } + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to write commit metrics report due to %s", e.getMessage()), e); + } + } + + /** + * Retrieves scan metrics reports for a specific table within a time range. + * + * @param catalogId the catalog entity ID + * @param tableId the table entity ID + * @param startTimeMs start of time range (inclusive), or null for no lower bound + * @param endTimeMs end of time range (exclusive), or null for no upper bound + * @param lastReportId cursor for pagination: return results after this report ID, or null for + * first page + * @param limit maximum number of results to return + * @return list of scan metrics reports matching the criteria + */ + @Nonnull + List queryScanMetricsReports( + long catalogId, + long tableId, + @Nullable Long startTimeMs, + @Nullable Long endTimeMs, + @Nullable String lastReportId, + int limit) { + try { + StringBuilder whereClause = new StringBuilder(); + whereClause.append("realm_id = ? AND catalog_id = ? AND table_id = ?"); + List values = new ArrayList<>(List.of(realmId, catalogId, tableId)); + + if (startTimeMs != null) { + whereClause.append(" AND timestamp_ms >= ?"); + values.add(startTimeMs); + } + if (endTimeMs != null) { + whereClause.append(" AND timestamp_ms < ?"); + values.add(endTimeMs); + } + if (lastReportId != null) { + whereClause.append(" AND report_id > ?"); + values.add(lastReportId); + } + + String sql = + "SELECT * FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) + + " WHERE " + + whereClause + + " ORDER BY report_id ASC LIMIT " + + limit; + + PreparedQuery query = new PreparedQuery(sql, values); + var results = + datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); + return results == null ? Collections.emptyList() : results; + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to query scan metrics reports due to %s", e.getMessage()), e); + } + } + + /** + * Retrieves commit metrics reports for a specific table within a time range. + * + * @param catalogId the catalog entity ID + * @param tableId the table entity ID + * @param startTimeMs start of time range (inclusive), or null for no lower bound + * @param endTimeMs end of time range (exclusive), or null for no upper bound + * @param lastReportId cursor for pagination: return results after this report ID, or null for + * first page + * @param limit maximum number of results to return + * @return list of commit metrics reports matching the criteria + */ + @Nonnull + List queryCommitMetricsReports( + long catalogId, + long tableId, + @Nullable Long startTimeMs, + @Nullable Long endTimeMs, + @Nullable String lastReportId, + int limit) { + try { + List values = new ArrayList<>(List.of(realmId, catalogId, tableId)); + + StringBuilder whereClause = new StringBuilder(); + whereClause.append("realm_id = ? AND catalog_id = ? AND table_id = ?"); + + if (startTimeMs != null) { + whereClause.append(" AND timestamp_ms >= ?"); + values.add(startTimeMs); + } + if (endTimeMs != null) { + whereClause.append(" AND timestamp_ms < ?"); + values.add(endTimeMs); + } + if (lastReportId != null) { + whereClause.append(" AND report_id > ?"); + values.add(lastReportId); + } + + String sql = + "SELECT * FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) + + " WHERE " + + whereClause + + " ORDER BY report_id ASC LIMIT " + + limit; + + PreparedQuery query = new PreparedQuery(sql, values); + var results = + datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); + return results == null ? Collections.emptyList() : results; + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to query commit metrics reports due to %s", e.getMessage()), e); + } + } + + /** + * Retrieves scan metrics reports by OpenTelemetry trace ID. + * + * @param traceId the OpenTelemetry trace ID + * @return list of scan metrics reports with the given trace ID + */ + @Nonnull + public List queryScanMetricsReportsByTraceId(@Nonnull String traceId) { + if (!supportsMetricsPersistence()) { + return Collections.emptyList(); + } + try { + String sql = + "SELECT * FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) + + " WHERE realm_id = ? AND otel_trace_id = ? ORDER BY timestamp_ms DESC"; + + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); + var results = + datasourceOperations.executeSelect(query, new ModelScanMetricsReportConverter()); + return results == null ? Collections.emptyList() : results; + } catch (SQLException e) { + throw new RuntimeException( + String.format( + "Failed to query scan metrics reports by trace ID due to %s", e.getMessage()), + e); + } + } + + /** + * Retrieves commit metrics reports by OpenTelemetry trace ID. + * + * @param traceId the OpenTelemetry trace ID + * @return list of commit metrics reports with the given trace ID + */ + @Nonnull + public List queryCommitMetricsReportsByTraceId( + @Nonnull String traceId) { + if (!supportsMetricsPersistence()) { + return Collections.emptyList(); + } + try { + String sql = + "SELECT * FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) + + " WHERE realm_id = ? AND otel_trace_id = ? ORDER BY timestamp_ms DESC"; + + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, traceId)); + var results = + datasourceOperations.executeSelect(query, new ModelCommitMetricsReportConverter()); + return results == null ? Collections.emptyList() : results; + } catch (SQLException e) { + throw new RuntimeException( + String.format( + "Failed to query commit metrics reports by trace ID due to %s", e.getMessage()), + e); + } + } + + /** + * Deletes scan metrics reports older than the specified timestamp. + * + * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be + * deleted + * @return the number of reports deleted, or 0 if schema version < 4 + */ + public int deleteScanMetricsReportsOlderThan(long olderThanMs) { + if (!supportsMetricsPersistence()) { + return 0; + } + try { + String sql = + "DELETE FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelScanMetricsReport.TABLE_NAME) + + " WHERE realm_id = ? AND timestamp_ms < ?"; + + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, olderThanMs)); + return datasourceOperations.executeUpdate(query); + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to delete old scan metrics reports due to %s", e.getMessage()), e); + } + } + + /** + * Deletes commit metrics reports older than the specified timestamp. + * + * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be + * deleted + * @return the number of reports deleted, or 0 if schema version < 4 + */ + public int deleteCommitMetricsReportsOlderThan(long olderThanMs) { + if (!supportsMetricsPersistence()) { + return 0; + } + try { + String sql = + "DELETE FROM " + + QueryGenerator.getFullyQualifiedTableName(ModelCommitMetricsReport.TABLE_NAME) + + " WHERE realm_id = ? AND timestamp_ms < ?"; + + PreparedQuery query = new PreparedQuery(sql, List.of(realmId, olderThanMs)); + return datasourceOperations.executeUpdate(query); + } catch (SQLException e) { + throw new RuntimeException( + String.format("Failed to delete old commit metrics reports due to %s", e.getMessage()), + e); + } + } + + /** + * Deletes all metrics reports (both scan and commit) older than the specified timestamp. + * + * @param olderThanMs timestamp in milliseconds; reports with timestamp_ms less than this will be + * deleted + * @return the total number of reports deleted (scan + commit), or 0 if schema version < 4 + */ + public int deleteAllMetricsReportsOlderThan(long olderThanMs) { + int scanDeleted = deleteScanMetricsReportsOlderThan(olderThanMs); + int commitDeleted = deleteCommitMetricsReportsOlderThan(olderThanMs); + return scanDeleted + commitDeleted; + } } diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java new file mode 100644 index 0000000000..f566e65c9a --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc; + +import io.smallrye.common.annotation.Identifier; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.enterprise.context.RequestScoped; +import jakarta.enterprise.inject.Instance; +import jakarta.enterprise.inject.Produces; +import jakarta.inject.Inject; +import java.sql.SQLException; +import javax.sql.DataSource; +import org.apache.polaris.core.config.BehaviorChangeConfiguration; +import org.apache.polaris.core.config.RealmConfig; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.persistence.metrics.MetricsPersistence; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * CDI producer for {@link MetricsPersistence} in the JDBC persistence backend. + * + *

      This producer creates {@link JdbcMetricsPersistence} instances when the JDBC persistence + * backend is in use. When metrics tables are not available (schema version < 4), the produced + * instance will report this via {@link JdbcMetricsPersistence#supportsMetricsPersistence()}. + */ +@ApplicationScoped +@Identifier("relational-jdbc") +public class JdbcMetricsPersistenceProducer { + + private static final Logger LOGGER = + LoggerFactory.getLogger(JdbcMetricsPersistenceProducer.class); + + @Inject Instance dataSource; + + @Inject RelationalJdbcConfiguration relationalJdbcConfiguration; + + @Inject RealmContext realmContext; + + @Inject RealmConfig realmConfig; + + /** + * Produces a {@link MetricsPersistence} instance for the current request. + * + *

      This method creates a new {@link JdbcMetricsPersistence} configured with the current realm + * and schema version. If the schema version is less than 4 (which includes metrics tables), the + * returned instance will be functional but all operations will be no-ops. + * + * @return a MetricsPersistence implementation for JDBC + */ + @Produces + @RequestScoped + @Identifier("relational-jdbc") + public MetricsPersistence metricsPersistence() { + try { + DatasourceOperations datasourceOperations = + new DatasourceOperations(dataSource.get(), relationalJdbcConfiguration); + + String realmId = realmContext.getRealmIdentifier(); + + int schemaVersion = + JdbcBasePersistenceImpl.loadSchemaVersion( + datasourceOperations, + realmConfig.getConfig(BehaviorChangeConfiguration.SCHEMA_VERSION_FALL_BACK_ON_DNE)); + + JdbcMetricsPersistence persistence = + new JdbcMetricsPersistence(datasourceOperations, realmId, schemaVersion); + + if (!persistence.supportsMetricsPersistence()) { + LOGGER.debug( + "Schema version {} does not support metrics tables. " + + "Metrics persistence operations will be no-ops.", + schemaVersion); + } + + return persistence; + } catch (SQLException e) { + LOGGER.warn( + "Failed to create JdbcMetricsPersistence due to {}. Returning NOOP implementation.", + e.getMessage()); + return MetricsPersistence.NOOP; + } + } +} diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index 57f460a93e..72b43b838c 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -18,7 +18,6 @@ */ package org.apache.polaris.persistence.relational.jdbc; -import static org.apache.polaris.core.persistence.PrincipalSecretsGenerator.RANDOM_SECRETS; import static org.assertj.core.api.Assertions.assertThat; import java.io.InputStream; @@ -26,9 +25,6 @@ import java.util.Optional; import java.util.UUID; import javax.sql.DataSource; -import org.apache.polaris.core.PolarisDefaultDiagServiceImpl; -import org.apache.polaris.core.PolarisDiagnostics; -import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelCommitMetricsReport; import org.apache.polaris.persistence.relational.jdbc.models.ImmutableModelScanMetricsReport; import org.apache.polaris.persistence.relational.jdbc.models.ModelCommitMetricsReport; @@ -36,15 +32,14 @@ import org.h2.jdbcx.JdbcConnectionPool; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.mockito.Mockito; /** - * Integration tests for metrics report persistence using JdbcBasePersistenceImpl. Tests the full + * Integration tests for metrics report persistence using JdbcMetricsPersistence. Tests the full * flow of writing scan and commit metrics reports to the database. */ class MetricsReportPersistenceTest { - private JdbcBasePersistenceImpl persistence; + private JdbcMetricsPersistence metricsPersistence; private DatasourceOperations datasourceOperations; @BeforeEach @@ -64,17 +59,7 @@ void setUp() throws SQLException { InputStream metricsSchemaStream = classLoader.getResourceAsStream("h2/schema-metrics-v1.sql"); datasourceOperations.executeScript(metricsSchemaStream); - PolarisDiagnostics diagServices = new PolarisDefaultDiagServiceImpl(); - RealmContext realmContext = () -> "TEST_REALM"; - - persistence = - new JdbcBasePersistenceImpl( - diagServices, - datasourceOperations, - RANDOM_SECRETS, - Mockito.mock(), - realmContext.getRealmIdentifier(), - 4); + metricsPersistence = new JdbcMetricsPersistence(datasourceOperations, "TEST_REALM", 4); } @Test @@ -114,7 +99,7 @@ void testWriteScanMetricsReport() { .build(); // Should not throw - persistence.writeScanMetricsReport(report); + metricsPersistence.writeScanMetricsReport(report); } @Test @@ -156,7 +141,7 @@ void testWriteCommitMetricsReport() { .build(); // Should not throw - persistence.writeCommitMetricsReport(report); + metricsPersistence.writeCommitMetricsReport(report); } @Test @@ -188,7 +173,7 @@ void testWriteMultipleScanReports() { .totalDeleteFileSizeBytes(0L) .build(); - persistence.writeScanMetricsReport(report); + metricsPersistence.writeScanMetricsReport(report); } } @@ -222,7 +207,7 @@ void testWriteReportWithNullOptionalFields() { .build(); // Should not throw even with null optional fields - persistence.writeScanMetricsReport(report); + metricsPersistence.writeScanMetricsReport(report); } @Test @@ -256,21 +241,22 @@ void testQueryScanMetricsReportsByTable() { .indexedDeleteFiles(0L) .totalDeleteFileSizeBytes(0L) .build(); - persistence.writeScanMetricsReport(report); + metricsPersistence.writeScanMetricsReport(report); } // Query all reports for the table - var results = persistence.queryScanMetricsReports(12345L, 88888L, null, null, null, 10); + var results = metricsPersistence.queryScanMetricsReports(12345L, 88888L, null, null, null, 10); assertThat(results).hasSize(5); // Query with time range var rangeResults = - persistence.queryScanMetricsReports( + metricsPersistence.queryScanMetricsReports( 12345L, 88888L, baseTime + 1000, baseTime + 4000, null, 10); assertThat(rangeResults).hasSize(3); // Query with limit - var limitedResults = persistence.queryScanMetricsReports(12345L, 88888L, null, null, null, 2); + var limitedResults = + metricsPersistence.queryScanMetricsReports(12345L, 88888L, null, null, null, 2); assertThat(limitedResults).hasSize(2); } @@ -305,10 +291,10 @@ void testQueryScanMetricsReportsByTraceId() { .indexedDeleteFiles(0L) .totalDeleteFileSizeBytes(0L) .build(); - persistence.writeScanMetricsReport(report); + metricsPersistence.writeScanMetricsReport(report); // Query by trace ID - var results = persistence.queryScanMetricsReportsByTraceId(traceId); + var results = metricsPersistence.queryScanMetricsReportsByTraceId(traceId); assertThat(results).hasSize(1); assertThat(results.get(0).getOtelTraceId()).isEqualTo(traceId); } @@ -346,7 +332,7 @@ void testDeleteOldScanMetricsReports() { .indexedDeleteFiles(0L) .totalDeleteFileSizeBytes(0L) .build(); - persistence.writeScanMetricsReport(oldReport); + metricsPersistence.writeScanMetricsReport(oldReport); // Create a recent report (1 hour ago) ModelScanMetricsReport recentReport = @@ -374,17 +360,17 @@ void testDeleteOldScanMetricsReports() { .indexedDeleteFiles(0L) .totalDeleteFileSizeBytes(0L) .build(); - persistence.writeScanMetricsReport(recentReport); + metricsPersistence.writeScanMetricsReport(recentReport); // Delete reports older than 1 day long oneDayAgo = now - 24 * 3600_000; - int deleted = persistence.deleteScanMetricsReportsOlderThan(oneDayAgo); + int deleted = metricsPersistence.deleteScanMetricsReportsOlderThan(oneDayAgo); // Should have deleted the old report assertThat(deleted).isEqualTo(1); // Query to verify only recent report remains - var results = persistence.queryScanMetricsReports(11111L, 67890L, null, null, null, 10); + var results = metricsPersistence.queryScanMetricsReports(11111L, 67890L, null, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -427,7 +413,7 @@ void testDeleteOldCommitMetricsReports() { .totalDurationMs(50L) .attempts(1) .build(); - persistence.writeCommitMetricsReport(oldReport); + metricsPersistence.writeCommitMetricsReport(oldReport); // Create a recent report (1 hour ago) ModelCommitMetricsReport recentReport = @@ -460,17 +446,18 @@ void testDeleteOldCommitMetricsReports() { .totalDurationMs(30L) .attempts(1) .build(); - persistence.writeCommitMetricsReport(recentReport); + metricsPersistence.writeCommitMetricsReport(recentReport); // Delete reports older than 1 day long oneDayAgo = now - 24 * 3600_000; - int deleted = persistence.deleteCommitMetricsReportsOlderThan(oneDayAgo); + int deleted = metricsPersistence.deleteCommitMetricsReportsOlderThan(oneDayAgo); // Should have deleted the old report assertThat(deleted).isEqualTo(1); // Query to verify only recent report remains - var results = persistence.queryCommitMetricsReports(11111L, 67890L, null, null, null, 10); + var results = + metricsPersistence.queryCommitMetricsReports(11111L, 67890L, null, null, null, 10); assertThat(results).hasSize(1); assertThat(results.get(0).getReportId()).isEqualTo(recentReport.getReportId()); } @@ -480,24 +467,24 @@ void testDeleteOldCommitMetricsReports() { @Test void testSupportsMetricsPersistence_SchemaV4() { - assertThat(persistence.supportsMetricsPersistence()).isTrue(); + assertThat(metricsPersistence.supportsMetricsPersistence()).isTrue(); } @Test void testSupportsMetricsPersistence_SchemaV3() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); assertThat(v3Persistence.supportsMetricsPersistence()).isFalse(); } @Test void testSupportsMetricsPersistence_SchemaV1() { - JdbcBasePersistenceImpl v1Persistence = createPersistenceWithSchemaVersion(1); + JdbcMetricsPersistence v1Persistence = createMetricsPersistenceWithSchemaVersion(1); assertThat(v1Persistence.supportsMetricsPersistence()).isFalse(); } @Test void testWriteScanMetricsReport_OlderSchema_IsNoOp() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); ModelScanMetricsReport report = ImmutableModelScanMetricsReport.builder() @@ -531,7 +518,7 @@ void testWriteScanMetricsReport_OlderSchema_IsNoOp() { @Test void testWriteCommitMetricsReport_OlderSchema_IsNoOp() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); ModelCommitMetricsReport report = ImmutableModelCommitMetricsReport.builder() @@ -569,7 +556,7 @@ void testWriteCommitMetricsReport_OlderSchema_IsNoOp() { @Test void testQueryScanMetricsReports_OlderSchema_ReturnsEmptyList() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); var results = v3Persistence.queryScanMetricsReports(12345L, 67890L, null, null, null, 10); @@ -578,7 +565,7 @@ void testQueryScanMetricsReports_OlderSchema_ReturnsEmptyList() { @Test void testQueryCommitMetricsReports_OlderSchema_ReturnsEmptyList() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); var results = v3Persistence.queryCommitMetricsReports(12345L, 67890L, null, null, null, 10); @@ -587,7 +574,7 @@ void testQueryCommitMetricsReports_OlderSchema_ReturnsEmptyList() { @Test void testQueryScanMetricsReportsByTraceId_OlderSchema_ReturnsEmptyList() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); var results = v3Persistence.queryScanMetricsReportsByTraceId("trace-123"); @@ -596,7 +583,7 @@ void testQueryScanMetricsReportsByTraceId_OlderSchema_ReturnsEmptyList() { @Test void testQueryCommitMetricsReportsByTraceId_OlderSchema_ReturnsEmptyList() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); var results = v3Persistence.queryCommitMetricsReportsByTraceId("trace-123"); @@ -605,7 +592,7 @@ void testQueryCommitMetricsReportsByTraceId_OlderSchema_ReturnsEmptyList() { @Test void testDeleteScanMetricsReportsOlderThan_OlderSchema_ReturnsZero() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); int deleted = v3Persistence.deleteScanMetricsReportsOlderThan(System.currentTimeMillis()); @@ -614,7 +601,7 @@ void testDeleteScanMetricsReportsOlderThan_OlderSchema_ReturnsZero() { @Test void testDeleteCommitMetricsReportsOlderThan_OlderSchema_ReturnsZero() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); int deleted = v3Persistence.deleteCommitMetricsReportsOlderThan(System.currentTimeMillis()); @@ -623,7 +610,7 @@ void testDeleteCommitMetricsReportsOlderThan_OlderSchema_ReturnsZero() { @Test void testDeleteAllMetricsReportsOlderThan_OlderSchema_ReturnsZero() { - JdbcBasePersistenceImpl v3Persistence = createPersistenceWithSchemaVersion(3); + JdbcMetricsPersistence v3Persistence = createMetricsPersistenceWithSchemaVersion(3); int deleted = v3Persistence.deleteAllMetricsReportsOlderThan(System.currentTimeMillis()); @@ -631,18 +618,11 @@ void testDeleteAllMetricsReportsOlderThan_OlderSchema_ReturnsZero() { } /** - * Creates a JdbcBasePersistenceImpl with the specified schema version. This uses the same + * Creates a JdbcMetricsPersistence with the specified schema version. This uses the same * datasource but with a different reported schema version to test graceful degradation. */ - private JdbcBasePersistenceImpl createPersistenceWithSchemaVersion(int schemaVersion) { - PolarisDiagnostics diagServices = new PolarisDefaultDiagServiceImpl(); - return new JdbcBasePersistenceImpl( - diagServices, - datasourceOperations, - RANDOM_SECRETS, - Mockito.mock(), - "TEST_REALM", - schemaVersion); + private JdbcMetricsPersistence createMetricsPersistenceWithSchemaVersion(int schemaVersion) { + return new JdbcMetricsPersistence(datasourceOperations, "TEST_REALM", schemaVersion); } private static class TestJdbcConfiguration implements RelationalJdbcConfiguration { diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 62e7a3aedc..423a36a17b 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -226,16 +226,25 @@ public PolarisMetaStoreManager polarisMetaStoreManager( } /** - * Produces a {@link MetricsPersistence} bean for the current request. The default implementation - * returns a no-op instance. Persistence backends that support metrics storage (e.g., JDBC with - * metrics schema) should provide an alternative producer that returns a functional - * implementation. + * Produces a {@link MetricsPersistence} bean for the current request. * + *

      This method selects a MetricsPersistence implementation based on the configured persistence + * type. If a backend-specific implementation is available (e.g., JDBC with metrics schema), it + * will be used. Otherwise, falls back to the no-op implementation. + * + * @param config the persistence configuration + * @param metricsPersistenceImpls all available MetricsPersistence implementations * @return a MetricsPersistence implementation for the current realm */ @Produces @RequestScoped - public MetricsPersistence metricsPersistence() { + public MetricsPersistence metricsPersistence( + PersistenceConfiguration config, @Any Instance metricsPersistenceImpls) { + Instance selected = + metricsPersistenceImpls.select(Identifier.Literal.of(config.type())); + if (selected.isResolvable()) { + return selected.get(); + } return MetricsPersistence.NOOP; } From ebb5bc859d939698e5aafc64a5d5f9bba9273fa1 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 11:01:26 -0800 Subject: [PATCH 63/67] refactor: Remove namespace from JDBC metrics implementation Address PR review comment - remove namespace since tableId uniquely identifies the table. Namespace can be derived from the table entity if needed. - Remove namespace column from H2 and Postgres schema-metrics-v1.sql - Remove namespace from Model classes (ModelScanMetricsReport, ModelCommitMetricsReport) - Remove namespace from converter classes - Update SpiModelConverter to not use namespace - Update PersistingMetricsReporter to not pass namespace - Update all related tests --- .../relational/jdbc/SpiModelConverter.java | 31 ------------ .../jdbc/models/MetricsReportConverter.java | 6 --- .../jdbc/models/ModelCommitMetricsReport.java | 7 --- .../ModelCommitMetricsReportConverter.java | 1 - .../jdbc/models/ModelScanMetricsReport.java | 7 --- .../ModelScanMetricsReportConverter.java | 1 - .../main/resources/h2/schema-metrics-v1.sql | 2 - .../resources/postgres/schema-metrics-v1.sql | 2 - .../jdbc/MetricsReportPersistenceTest.java | 12 ----- .../jdbc/SpiModelConverterTest.java | 49 ------------------- .../models/ModelCommitMetricsReportTest.java | 5 -- .../models/ModelScanMetricsReportTest.java | 6 --- .../reporting/PersistingMetricsReporter.java | 4 -- .../PersistingMetricsReporterTest.java | 2 - 14 files changed, 135 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java index 90982b2f83..11999b33e2 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverter.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import java.time.Instant; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; @@ -46,7 +45,6 @@ * *

        *
      • catalogId: long (SPI) ↔ long (Model) - *
      • namespace: List<String> (SPI) ↔ dot-separated string (Model) *
      • timestamp: Instant (SPI) ↔ long milliseconds (Model) *
      • metadata: Map<String, String> (SPI) ↔ JSON string (Model) *
      • projectedFieldIds/Names: List (SPI) ↔ comma-separated string (Model) @@ -72,7 +70,6 @@ public static ModelScanMetricsReport toModelScanReport(ScanMetricsRecord record, .reportId(record.reportId()) .realmId(realmId) .catalogId(record.catalogId()) - .namespace(toNamespaceJson(record.namespace())) .tableId(record.tableId()) .timestampMs(record.timestamp().toEpochMilli()) .snapshotId(record.snapshotId().orElse(null)) @@ -113,7 +110,6 @@ public static ModelCommitMetricsReport toModelCommitReport( .reportId(record.reportId()) .realmId(realmId) .catalogId(record.catalogId()) - .namespace(toNamespaceJson(record.namespace())) .tableId(record.tableId()) .timestampMs(record.timestamp().toEpochMilli()) .snapshotId(record.snapshotId()) @@ -151,7 +147,6 @@ public static ScanMetricsRecord toScanMetricsRecord(ModelScanMetricsReport model return ScanMetricsRecord.builder() .reportId(model.getReportId()) .catalogId(model.getCatalogId()) - .namespace(parseNamespace(model.getNamespace())) .tableId(model.getTableId()) .timestamp(Instant.ofEpochMilli(model.getTimestampMs())) .snapshotId(Optional.ofNullable(model.getSnapshotId())) @@ -189,7 +184,6 @@ public static CommitMetricsRecord toCommitMetricsRecord(ModelCommitMetricsReport return CommitMetricsRecord.builder() .reportId(model.getReportId()) .catalogId(model.getCatalogId()) - .namespace(parseNamespace(model.getNamespace())) .tableId(model.getTableId()) .timestamp(Instant.ofEpochMilli(model.getTimestampMs())) .snapshotId(model.getSnapshotId()) @@ -222,20 +216,6 @@ public static CommitMetricsRecord toCommitMetricsRecord(ModelCommitMetricsReport // === Helper Methods === - private static List parseNamespace(String namespace) { - if (namespace == null || namespace.isEmpty()) { - return Collections.emptyList(); - } - // Namespace is stored as a JSON array to preserve segment boundaries - // (namespace levels may contain dots) - try { - return OBJECT_MAPPER.readValue(namespace, new TypeReference>() {}); - } catch (JsonProcessingException e) { - // Fallback for any legacy dot-separated data - return Arrays.asList(namespace.split("\\.")); - } - } - private static String toCommaSeparated(List list) { if (list == null || list.isEmpty()) { return null; @@ -264,17 +244,6 @@ private static List parseStringList(String commaSeparated) { .collect(Collectors.toList()); } - private static String toNamespaceJson(List namespace) { - if (namespace == null || namespace.isEmpty()) { - return ""; - } - try { - return OBJECT_MAPPER.writeValueAsString(namespace); - } catch (JsonProcessingException e) { - return ""; - } - } - private static String toJsonString(Map map) { if (map == null || map.isEmpty()) { return "{}"; diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java index d3fe29e4ad..4bd841f0a0 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsReportConverter.java @@ -50,7 +50,6 @@ private MetricsReportConverter() { * @param realmId the realm ID for multi-tenancy * @param catalogId the catalog ID * @param tableId the table entity ID - * @param namespace the namespace (dot-separated) * @param principalName the principal who initiated the scan (optional) * @param requestId the request ID (optional) * @param otelTraceId OpenTelemetry trace ID (optional) @@ -62,7 +61,6 @@ public static ModelScanMetricsReport fromScanReport( String realmId, long catalogId, long tableId, - String namespace, @Nullable String principalName, @Nullable String requestId, @Nullable String otelTraceId, @@ -78,7 +76,6 @@ public static ModelScanMetricsReport fromScanReport( .reportId(reportId) .realmId(realmId) .catalogId(catalogId) - .namespace(namespace) .tableId(tableId) .timestampMs(timestampMs) .principalName(principalName) @@ -146,7 +143,6 @@ public static ModelScanMetricsReport fromScanReport( * @param realmId the realm ID for multi-tenancy * @param catalogId the catalog ID * @param tableId the table entity ID - * @param namespace the namespace (dot-separated) * @param principalName the principal who initiated the commit (optional) * @param requestId the request ID (optional) * @param otelTraceId OpenTelemetry trace ID (optional) @@ -158,7 +154,6 @@ public static ModelCommitMetricsReport fromCommitReport( String realmId, long catalogId, long tableId, - String namespace, @Nullable String principalName, @Nullable String requestId, @Nullable String otelTraceId, @@ -174,7 +169,6 @@ public static ModelCommitMetricsReport fromCommitReport( .reportId(reportId) .realmId(realmId) .catalogId(catalogId) - .namespace(namespace) .tableId(tableId) .timestampMs(timestampMs) .principalName(principalName) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java index e181d702d0..3bb6de016d 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReport.java @@ -36,7 +36,6 @@ public interface ModelCommitMetricsReport extends Converter toMap(DatabaseType databaseType) { map.put(REPORT_ID, getReportId()); map.put(REALM_ID, getRealmId()); map.put(CATALOG_ID, getCatalogId()); - map.put(NAMESPACE, getNamespace()); map.put(TABLE_ID_COL, getTableId()); map.put(TIMESTAMP_MS, getTimestampMs()); map.put(PRINCIPAL_NAME, getPrincipalName()); @@ -266,7 +260,6 @@ default Map toMap(DatabaseType databaseType) { .reportId("") .realmId("") .catalogId(0L) - .namespace("") .tableId(0L) .timestampMs(0L) .snapshotId(0L) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java index b683edd0e3..9bb8527f8d 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportConverter.java @@ -35,7 +35,6 @@ public ModelCommitMetricsReport fromResultSet(ResultSet rs) throws SQLException .reportId(rs.getString(ModelCommitMetricsReport.REPORT_ID)) .realmId(rs.getString(ModelCommitMetricsReport.REALM_ID)) .catalogId(rs.getLong(ModelCommitMetricsReport.CATALOG_ID)) - .namespace(rs.getString(ModelCommitMetricsReport.NAMESPACE)) .tableId(rs.getLong(ModelCommitMetricsReport.TABLE_ID_COL)) .timestampMs(rs.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)) .principalName(rs.getString(ModelCommitMetricsReport.PRINCIPAL_NAME)) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java index c81a70f5b4..91dc88905c 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReport.java @@ -36,7 +36,6 @@ public interface ModelScanMetricsReport extends Converter toMap(DatabaseType databaseType) { map.put(REPORT_ID, getReportId()); map.put(REALM_ID, getRealmId()); map.put(CATALOG_ID, getCatalogId()); - map.put(NAMESPACE, getNamespace()); map.put(TABLE_ID_COL, getTableId()); map.put(TIMESTAMP_MS, getTimestampMs()); map.put(PRINCIPAL_NAME, getPrincipalName()); @@ -270,7 +264,6 @@ default Map toMap(DatabaseType databaseType) { .reportId("") .realmId("") .catalogId(0L) - .namespace("") .tableId(0L) .timestampMs(0L) .resultDataFiles(0L) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java index 1abbc0389c..ab7c8f4e60 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportConverter.java @@ -35,7 +35,6 @@ public ModelScanMetricsReport fromResultSet(ResultSet rs) throws SQLException { .reportId(rs.getString(ModelScanMetricsReport.REPORT_ID)) .realmId(rs.getString(ModelScanMetricsReport.REALM_ID)) .catalogId(rs.getLong(ModelScanMetricsReport.CATALOG_ID)) - .namespace(rs.getString(ModelScanMetricsReport.NAMESPACE)) .tableId(rs.getLong(ModelScanMetricsReport.TABLE_ID_COL)) .timestampMs(rs.getLong(ModelScanMetricsReport.TIMESTAMP_MS)) .principalName(rs.getString(ModelScanMetricsReport.PRINCIPAL_NAME)) diff --git a/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql index 6cc7649723..8acf84c398 100644 --- a/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql +++ b/persistence/relational-jdbc/src/main/resources/h2/schema-metrics-v1.sql @@ -52,7 +52,6 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, - namespace TEXT NOT NULL, table_id BIGINT NOT NULL, -- Report metadata @@ -111,7 +110,6 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, - namespace TEXT NOT NULL, table_id BIGINT NOT NULL, -- Report metadata diff --git a/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql index 4207f949da..4725974b79 100644 --- a/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql +++ b/persistence/relational-jdbc/src/main/resources/postgres/schema-metrics-v1.sql @@ -52,7 +52,6 @@ CREATE TABLE IF NOT EXISTS scan_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, - namespace TEXT NOT NULL, table_id BIGINT NOT NULL, -- Report metadata @@ -117,7 +116,6 @@ CREATE TABLE IF NOT EXISTS commit_metrics_report ( report_id TEXT NOT NULL, realm_id TEXT NOT NULL, catalog_id BIGINT NOT NULL, - namespace TEXT NOT NULL, table_id BIGINT NOT NULL, -- Report metadata diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java index 72b43b838c..d78d44f1cb 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/MetricsReportPersistenceTest.java @@ -69,7 +69,6 @@ void testWriteScanMetricsReport() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) @@ -109,7 +108,6 @@ void testWriteCommitMetricsReport() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) @@ -152,7 +150,6 @@ void testWriteMultipleScanReports() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(100L + i) .timestampMs(System.currentTimeMillis()) .resultDataFiles((long) (i * 10)) @@ -184,7 +181,6 @@ void testWriteReportWithNullOptionalFields() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db") .tableId(99999L) .timestampMs(System.currentTimeMillis()) // All optional fields left as null @@ -221,7 +217,6 @@ void testQueryScanMetricsReportsByTable() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db.schema") .tableId(88888L) .timestampMs(baseTime + i * 1000) .resultDataFiles((long) i) @@ -270,7 +265,6 @@ void testQueryScanMetricsReportsByTraceId() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db") .tableId(77777L) .timestampMs(System.currentTimeMillis()) .otelTraceId(traceId) @@ -312,7 +306,6 @@ void testDeleteOldScanMetricsReports() { .reportId("old-report-" + UUID.randomUUID()) .realmId("TEST_REALM") .catalogId(11111L) - .namespace("test_namespace") .tableId(67890L) .timestampMs(twoDaysAgo) .resultDataFiles(10L) @@ -340,7 +333,6 @@ void testDeleteOldScanMetricsReports() { .reportId("recent-report-" + UUID.randomUUID()) .realmId("TEST_REALM") .catalogId(11111L) - .namespace("test_namespace") .tableId(67890L) .timestampMs(oneHourAgo) .resultDataFiles(10L) @@ -388,7 +380,6 @@ void testDeleteOldCommitMetricsReports() { .reportId("old-commit-" + UUID.randomUUID()) .realmId("TEST_REALM") .catalogId(11111L) - .namespace("test_namespace") .tableId(67890L) .timestampMs(twoDaysAgo) .snapshotId(100L) @@ -421,7 +412,6 @@ void testDeleteOldCommitMetricsReports() { .reportId("recent-commit-" + UUID.randomUUID()) .realmId("TEST_REALM") .catalogId(11111L) - .namespace("test_namespace") .tableId(67890L) .timestampMs(oneHourAgo) .snapshotId(101L) @@ -491,7 +481,6 @@ void testWriteScanMetricsReport_OlderSchema_IsNoOp() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db") .tableId(67890L) .timestampMs(System.currentTimeMillis()) .resultDataFiles(1L) @@ -525,7 +514,6 @@ void testWriteCommitMetricsReport_OlderSchema_IsNoOp() { .reportId(UUID.randomUUID().toString()) .realmId("TEST_REALM") .catalogId(12345L) - .namespace("db") .tableId(67890L) .timestampMs(System.currentTimeMillis()) .snapshotId(12345L) diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java index d29f8a3851..c283209b72 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/SpiModelConverterTest.java @@ -38,9 +38,6 @@ public class SpiModelConverterTest { private static final String TEST_REPORT_ID = "report-123"; private static final String TEST_REALM_ID = "realm-1"; private static final long TEST_CATALOG_ID = 12345L; - private static final List TEST_NAMESPACE = List.of("db", "schema"); - // Namespace is stored as JSON array - private static final String TEST_NAMESPACE_STR = "[\"db\",\"schema\"]"; private static final long TEST_TABLE_ID = 67890L; private static final Instant TEST_TIMESTAMP = Instant.ofEpochMilli(1704067200000L); private static final long TEST_TIMESTAMP_MS = 1704067200000L; @@ -56,7 +53,6 @@ void testToModelScanReport() { assertThat(model.getReportId()).isEqualTo(TEST_REPORT_ID); assertThat(model.getRealmId()).isEqualTo(TEST_REALM_ID); assertThat(model.getCatalogId()).isEqualTo(TEST_CATALOG_ID); - assertThat(model.getNamespace()).isEqualTo(TEST_NAMESPACE_STR); assertThat(model.getTableId()).isEqualTo(TEST_TABLE_ID); assertThat(model.getTimestampMs()).isEqualTo(TEST_TIMESTAMP_MS); assertThat(model.getSnapshotId()).isEqualTo(123456789L); @@ -78,7 +74,6 @@ void testToScanMetricsRecord() { assertThat(record.reportId()).isEqualTo(TEST_REPORT_ID); assertThat(record.catalogId()).isEqualTo(TEST_CATALOG_ID); - assertThat(record.namespace()).isEqualTo(TEST_NAMESPACE); assertThat(record.tableId()).isEqualTo(TEST_TABLE_ID); assertThat(record.timestamp()).isEqualTo(TEST_TIMESTAMP); assertThat(record.snapshotId()).isEqualTo(Optional.of(123456789L)); @@ -99,7 +94,6 @@ void testScanRecordRoundTrip() { assertThat(roundTripped.reportId()).isEqualTo(original.reportId()); assertThat(roundTripped.catalogId()).isEqualTo(original.catalogId()); - assertThat(roundTripped.namespace()).isEqualTo(original.namespace()); assertThat(roundTripped.tableId()).isEqualTo(original.tableId()); assertThat(roundTripped.timestamp()).isEqualTo(original.timestamp()); assertThat(roundTripped.resultDataFiles()).isEqualTo(original.resultDataFiles()); @@ -116,7 +110,6 @@ void testToModelCommitReport() { assertThat(model.getReportId()).isEqualTo(TEST_REPORT_ID); assertThat(model.getRealmId()).isEqualTo(TEST_REALM_ID); assertThat(model.getCatalogId()).isEqualTo(TEST_CATALOG_ID); - assertThat(model.getNamespace()).isEqualTo(TEST_NAMESPACE_STR); assertThat(model.getTableId()).isEqualTo(TEST_TABLE_ID); assertThat(model.getTimestampMs()).isEqualTo(TEST_TIMESTAMP_MS); assertThat(model.getSnapshotId()).isEqualTo(987654321L); @@ -136,7 +129,6 @@ void testToCommitMetricsRecord() { assertThat(record.reportId()).isEqualTo(TEST_REPORT_ID); assertThat(record.catalogId()).isEqualTo(TEST_CATALOG_ID); - assertThat(record.namespace()).isEqualTo(TEST_NAMESPACE); assertThat(record.tableId()).isEqualTo(TEST_TABLE_ID); assertThat(record.timestamp()).isEqualTo(TEST_TIMESTAMP); assertThat(record.snapshotId()).isEqualTo(987654321L); @@ -155,7 +147,6 @@ void testCommitRecordRoundTrip() { assertThat(roundTripped.reportId()).isEqualTo(original.reportId()); assertThat(roundTripped.catalogId()).isEqualTo(original.catalogId()); - assertThat(roundTripped.namespace()).isEqualTo(original.namespace()); assertThat(roundTripped.tableId()).isEqualTo(original.tableId()); assertThat(roundTripped.timestamp()).isEqualTo(original.timestamp()); assertThat(roundTripped.snapshotId()).isEqualTo(original.snapshotId()); @@ -164,47 +155,12 @@ void testCommitRecordRoundTrip() { // === Edge Cases === - @Test - void testEmptyNamespace() { - ScanMetricsRecord record = - ScanMetricsRecord.builder() - .reportId(TEST_REPORT_ID) - .catalogId(TEST_CATALOG_ID) - .namespace(List.of()) - .tableId(TEST_TABLE_ID) - .timestamp(TEST_TIMESTAMP) - .resultDataFiles(0L) - .resultDeleteFiles(0L) - .totalFileSizeBytes(0L) - .totalDataManifests(0L) - .totalDeleteManifests(0L) - .scannedDataManifests(0L) - .scannedDeleteManifests(0L) - .skippedDataManifests(0L) - .skippedDeleteManifests(0L) - .skippedDataFiles(0L) - .skippedDeleteFiles(0L) - .totalPlanningDurationMs(0L) - .equalityDeleteFiles(0L) - .positionalDeleteFiles(0L) - .indexedDeleteFiles(0L) - .totalDeleteFileSizeBytes(0L) - .build(); - - ModelScanMetricsReport model = SpiModelConverter.toModelScanReport(record, TEST_REALM_ID); - assertThat(model.getNamespace()).isEmpty(); - - ScanMetricsRecord roundTripped = SpiModelConverter.toScanMetricsRecord(model); - assertThat(roundTripped.namespace()).isEmpty(); - } - @Test void testNullOptionalFields() { ScanMetricsRecord record = ScanMetricsRecord.builder() .reportId(TEST_REPORT_ID) .catalogId(TEST_CATALOG_ID) - .namespace(List.of("db")) .tableId(TEST_TABLE_ID) .timestamp(TEST_TIMESTAMP) .resultDataFiles(0L) @@ -239,7 +195,6 @@ void testEmptyMetadata() { ScanMetricsRecord.builder() .reportId(TEST_REPORT_ID) .catalogId(TEST_CATALOG_ID) - .namespace(List.of("db")) .tableId(TEST_TABLE_ID) .timestamp(TEST_TIMESTAMP) .resultDataFiles(0L) @@ -270,7 +225,6 @@ private ScanMetricsRecord createTestScanRecord() { return ScanMetricsRecord.builder() .reportId(TEST_REPORT_ID) .catalogId(TEST_CATALOG_ID) - .namespace(TEST_NAMESPACE) .tableId(TEST_TABLE_ID) .timestamp(TEST_TIMESTAMP) .snapshotId(123456789L) @@ -303,7 +257,6 @@ private ModelScanMetricsReport createTestModelScanReport() { .reportId(TEST_REPORT_ID) .realmId(TEST_REALM_ID) .catalogId(TEST_CATALOG_ID) - .namespace(TEST_NAMESPACE_STR) .tableId(TEST_TABLE_ID) .timestampMs(TEST_TIMESTAMP_MS) .snapshotId(123456789L) @@ -335,7 +288,6 @@ private CommitMetricsRecord createTestCommitRecord() { return CommitMetricsRecord.builder() .reportId(TEST_REPORT_ID) .catalogId(TEST_CATALOG_ID) - .namespace(TEST_NAMESPACE) .tableId(TEST_TABLE_ID) .timestamp(TEST_TIMESTAMP) .snapshotId(987654321L) @@ -368,7 +320,6 @@ private ModelCommitMetricsReport createTestModelCommitReport() { .reportId(TEST_REPORT_ID) .realmId(TEST_REALM_ID) .catalogId(TEST_CATALOG_ID) - .namespace(TEST_NAMESPACE_STR) .tableId(TEST_TABLE_ID) .timestampMs(TEST_TIMESTAMP_MS) .snapshotId(987654321L) diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java index 557ee6a9c9..514e28f902 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelCommitMetricsReportTest.java @@ -34,7 +34,6 @@ public class ModelCommitMetricsReportTest { private static final String TEST_REPORT_ID = "commit-report-123"; private static final String TEST_REALM_ID = "realm-1"; private static final long TEST_CATALOG_ID = 12345L; - private static final String TEST_NAMESPACE = "db.schema"; private static final long TEST_TABLE_ID = 67890L; private static final long TEST_TIMESTAMP_MS = 1704067200000L; private static final String TEST_PRINCIPAL = "user@example.com"; @@ -71,7 +70,6 @@ public void testFromResultSet() throws SQLException { when(mockResultSet.getString(ModelCommitMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); when(mockResultSet.getString(ModelCommitMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); when(mockResultSet.getLong(ModelCommitMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); - when(mockResultSet.getString(ModelCommitMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); when(mockResultSet.getLong(ModelCommitMetricsReport.TABLE_ID_COL)).thenReturn(TEST_TABLE_ID); when(mockResultSet.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)) .thenReturn(TEST_TIMESTAMP_MS); @@ -131,7 +129,6 @@ public void testFromResultSet() throws SQLException { assertEquals(TEST_REPORT_ID, result.getReportId()); assertEquals(TEST_REALM_ID, result.getRealmId()); assertEquals(TEST_CATALOG_ID, result.getCatalogId()); - assertEquals(TEST_NAMESPACE, result.getNamespace()); assertEquals(TEST_TABLE_ID, result.getTableId()); assertEquals(TEST_TIMESTAMP_MS, result.getTimestampMs()); assertEquals(TEST_SNAPSHOT_ID, result.getSnapshotId()); @@ -178,7 +175,6 @@ public void testConverterFromResultSet() throws SQLException { when(mockResultSet.getString(ModelCommitMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); when(mockResultSet.getString(ModelCommitMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); when(mockResultSet.getLong(ModelCommitMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); - when(mockResultSet.getString(ModelCommitMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); when(mockResultSet.getLong(ModelCommitMetricsReport.TABLE_ID_COL)).thenReturn(TEST_TABLE_ID); when(mockResultSet.getLong(ModelCommitMetricsReport.TIMESTAMP_MS)) .thenReturn(TEST_TIMESTAMP_MS); @@ -247,7 +243,6 @@ private ModelCommitMetricsReport createTestReport() { .reportId(TEST_REPORT_ID) .realmId(TEST_REALM_ID) .catalogId(TEST_CATALOG_ID) - .namespace(TEST_NAMESPACE) .tableId(TEST_TABLE_ID) .timestampMs(TEST_TIMESTAMP_MS) .principalName(TEST_PRINCIPAL) diff --git a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java index d2186297c7..fc8e43b270 100644 --- a/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java +++ b/persistence/relational-jdbc/src/test/java/org/apache/polaris/persistence/relational/jdbc/models/ModelScanMetricsReportTest.java @@ -34,7 +34,6 @@ public class ModelScanMetricsReportTest { private static final String TEST_REPORT_ID = "report-123"; private static final String TEST_REALM_ID = "realm-1"; private static final long TEST_CATALOG_ID = 12345L; - private static final String TEST_NAMESPACE = "db.schema"; private static final long TEST_TABLE_ID = 67890L; private static final long TEST_TIMESTAMP_MS = 1704067200000L; private static final String TEST_PRINCIPAL = "user@example.com"; @@ -71,7 +70,6 @@ public void testFromResultSet() throws SQLException { when(mockResultSet.getString(ModelScanMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); when(mockResultSet.getString(ModelScanMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); when(mockResultSet.getLong(ModelScanMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); - when(mockResultSet.getString(ModelScanMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); when(mockResultSet.getLong(ModelScanMetricsReport.TABLE_ID_COL)).thenReturn(TEST_TABLE_ID); when(mockResultSet.getLong(ModelScanMetricsReport.TIMESTAMP_MS)).thenReturn(TEST_TIMESTAMP_MS); when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_NAME)).thenReturn(TEST_PRINCIPAL); @@ -130,7 +128,6 @@ public void testFromResultSet() throws SQLException { assertEquals(TEST_REPORT_ID, result.getReportId()); assertEquals(TEST_REALM_ID, result.getRealmId()); assertEquals(TEST_CATALOG_ID, result.getCatalogId()); - assertEquals(TEST_NAMESPACE, result.getNamespace()); assertEquals(TEST_TABLE_ID, result.getTableId()); assertEquals(TEST_TIMESTAMP_MS, result.getTimestampMs()); assertEquals(TEST_PRINCIPAL, result.getPrincipalName()); @@ -152,7 +149,6 @@ public void testToMapWithH2DatabaseType() { assertEquals(TEST_REPORT_ID, resultMap.get(ModelScanMetricsReport.REPORT_ID)); assertEquals(TEST_REALM_ID, resultMap.get(ModelScanMetricsReport.REALM_ID)); assertEquals(TEST_CATALOG_ID, resultMap.get(ModelScanMetricsReport.CATALOG_ID)); - assertEquals(TEST_NAMESPACE, resultMap.get(ModelScanMetricsReport.NAMESPACE)); assertEquals(TEST_TABLE_ID, resultMap.get(ModelScanMetricsReport.TABLE_ID_COL)); assertEquals(TEST_TIMESTAMP_MS, resultMap.get(ModelScanMetricsReport.TIMESTAMP_MS)); assertEquals(TEST_RESULT_DATA_FILES, resultMap.get(ModelScanMetricsReport.RESULT_DATA_FILES)); @@ -180,7 +176,6 @@ public void testConverterFromResultSet() throws SQLException { when(mockResultSet.getString(ModelScanMetricsReport.REPORT_ID)).thenReturn(TEST_REPORT_ID); when(mockResultSet.getString(ModelScanMetricsReport.REALM_ID)).thenReturn(TEST_REALM_ID); when(mockResultSet.getLong(ModelScanMetricsReport.CATALOG_ID)).thenReturn(TEST_CATALOG_ID); - when(mockResultSet.getString(ModelScanMetricsReport.NAMESPACE)).thenReturn(TEST_NAMESPACE); when(mockResultSet.getLong(ModelScanMetricsReport.TABLE_ID_COL)).thenReturn(TEST_TABLE_ID); when(mockResultSet.getLong(ModelScanMetricsReport.TIMESTAMP_MS)).thenReturn(TEST_TIMESTAMP_MS); when(mockResultSet.getString(ModelScanMetricsReport.PRINCIPAL_NAME)).thenReturn(TEST_PRINCIPAL); @@ -247,7 +242,6 @@ private ModelScanMetricsReport createTestReport() { .reportId(TEST_REPORT_ID) .realmId(TEST_REALM_ID) .catalogId(TEST_CATALOG_ID) - .namespace(TEST_NAMESPACE) .tableId(TEST_TABLE_ID) .timestampMs(TEST_TIMESTAMP_MS) .principalName(TEST_PRINCIPAL) diff --git a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java index 9ed8fa1c52..11cebacc28 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/reporting/PersistingMetricsReporter.java @@ -23,7 +23,6 @@ import jakarta.inject.Inject; import java.time.Instant; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.metrics.CommitReport; @@ -148,14 +147,12 @@ public void reportMetric( } long tableId = tableResult.getEntity().getId(); - List namespace = Arrays.asList(namespaceLevels); if (metricsReport instanceof ScanReport scanReport) { ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport) .catalogId(catalogId) .tableId(tableId) - .namespace(namespace) .timestamp(receivedTimestamp) .build(); metricsPersistence.writeScanReport(record); @@ -166,7 +163,6 @@ public void reportMetric( MetricsRecordConverter.forCommitReport(commitReport) .catalogId(catalogId) .tableId(tableId) - .namespace(namespace) .timestamp(receivedTimestamp) .build(); metricsPersistence.writeCommitReport(record); diff --git a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java index c017f9284f..e33500c4bd 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/reporting/PersistingMetricsReporterTest.java @@ -139,7 +139,6 @@ void testReportScanMetrics() { ScanMetricsRecord record = captor.getValue(); assertThat(record.catalogId()).isEqualTo(CATALOG_ID); assertThat(record.tableId()).isEqualTo(TABLE_ID); - assertThat(record.namespace()).isEqualTo(NAMESPACE); assertThat(record.reportId()).isNotNull(); } @@ -197,7 +196,6 @@ void testReportCommitMetrics() { CommitMetricsRecord record = captor.getValue(); assertThat(record.catalogId()).isEqualTo(CATALOG_ID); assertThat(record.tableId()).isEqualTo(TABLE_ID); - assertThat(record.namespace()).isEqualTo(NAMESPACE); assertThat(record.reportId()).isNotNull(); } From b2396da37ebc6e972501a411b767d177bdf12f92 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 11:32:22 -0800 Subject: [PATCH 64/67] Fix failing tests --- .../relational/jdbc/JdbcMetricsPersistenceProducer.java | 8 +++----- .../polaris/admintool/config/AdminToolProducers.java | 8 ++++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java index f566e65c9a..17bf816b02 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java @@ -51,10 +51,6 @@ public class JdbcMetricsPersistenceProducer { @Inject RelationalJdbcConfiguration relationalJdbcConfiguration; - @Inject RealmContext realmContext; - - @Inject RealmConfig realmConfig; - /** * Produces a {@link MetricsPersistence} instance for the current request. * @@ -62,12 +58,14 @@ public class JdbcMetricsPersistenceProducer { * and schema version. If the schema version is less than 4 (which includes metrics tables), the * returned instance will be functional but all operations will be no-ops. * + * @param realmContext the current realm context (request-scoped) + * @param realmConfig the realm configuration (request-scoped) * @return a MetricsPersistence implementation for JDBC */ @Produces @RequestScoped @Identifier("relational-jdbc") - public MetricsPersistence metricsPersistence() { + public MetricsPersistence metricsPersistence(RealmContext realmContext, RealmConfig realmConfig) { try { DatasourceOperations datasourceOperations = new DatasourceOperations(dataSource.get(), relationalJdbcConfiguration); diff --git a/runtime/admin/src/main/java/org/apache/polaris/admintool/config/AdminToolProducers.java b/runtime/admin/src/main/java/org/apache/polaris/admintool/config/AdminToolProducers.java index 236325a588..475292b7ce 100644 --- a/runtime/admin/src/main/java/org/apache/polaris/admintool/config/AdminToolProducers.java +++ b/runtime/admin/src/main/java/org/apache/polaris/admintool/config/AdminToolProducers.java @@ -31,6 +31,7 @@ import org.apache.polaris.core.config.PolarisConfigurationStore; import org.apache.polaris.core.config.RealmConfig; import org.apache.polaris.core.config.RealmConfigImpl; +import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.core.persistence.MetaStoreManagerFactory; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.apache.polaris.core.storage.PolarisStorageIntegration; @@ -85,4 +86,11 @@ public RealmConfig dummyRealmConfig(PolarisConfigurationStore configurationStore String absentId = UUID.randomUUID().toString(); return new RealmConfigImpl(configurationStore, () -> absentId); } + + @Produces + public RealmContext dummyRealmContext() { + // A dummy RealmContext for the admin tool - required by JdbcMetricsPersistenceProducer + // but not actually used since the admin tool doesn't persist metrics. + return () -> "admin-tool-realm"; + } } From 23288240d4f61a3f0ed502281ae397558339efcd Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 12:49:42 -0800 Subject: [PATCH 65/67] refactor: Address PR review comments for MetricsPersistence - Create separate MetricsPersistenceConfiguration with polaris.persistence.metrics prefix and 'noop' default value (addresses r2775695727) - Add NoopMetricsPersistence CDI bean with @Identifier("noop") annotation - Update ServiceProducers.metricsPersistence() to error if type not resolvable instead of falling back to NOOP (addresses r2775688321) - Refactor AdminToolProducers: move UUID to dummyRealmContext() and have dummyRealmConfig() take RealmContext as parameter (addresses r2775725538) --- .../admintool/config/AdminToolProducers.java | 15 ++-- .../service/config/ServiceProducers.java | 19 +++-- .../MetricsPersistenceConfiguration.java | 47 +++++++++++++ .../persistence/NoopMetricsPersistence.java | 70 +++++++++++++++++++ 4 files changed, 133 insertions(+), 18 deletions(-) create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/persistence/MetricsPersistenceConfiguration.java create mode 100644 runtime/service/src/main/java/org/apache/polaris/service/persistence/NoopMetricsPersistence.java diff --git a/runtime/admin/src/main/java/org/apache/polaris/admintool/config/AdminToolProducers.java b/runtime/admin/src/main/java/org/apache/polaris/admintool/config/AdminToolProducers.java index 475292b7ce..df64236eec 100644 --- a/runtime/admin/src/main/java/org/apache/polaris/admintool/config/AdminToolProducers.java +++ b/runtime/admin/src/main/java/org/apache/polaris/admintool/config/AdminToolProducers.java @@ -81,16 +81,17 @@ public PolarisConfigurationStore configurationStore() { } @Produces - public RealmConfig dummyRealmConfig(PolarisConfigurationStore configurationStore) { - // Use a random realm ID for RealmConfig since the PolarisConfigurationStore is empty anyway + public RealmContext dummyRealmContext() { + // Use UUID to protect against accidental realm ID collisions. + // This is a dummy RealmContext for the admin tool - required by JdbcMetricsPersistenceProducer + // but not actually used since the admin tool doesn't persist metrics. String absentId = UUID.randomUUID().toString(); - return new RealmConfigImpl(configurationStore, () -> absentId); + return () -> absentId; } @Produces - public RealmContext dummyRealmContext() { - // A dummy RealmContext for the admin tool - required by JdbcMetricsPersistenceProducer - // but not actually used since the admin tool doesn't persist metrics. - return () -> "admin-tool-realm"; + public RealmConfig dummyRealmConfig( + PolarisConfigurationStore configurationStore, RealmContext realmContext) { + return new RealmConfigImpl(configurationStore, realmContext); } } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 423a36a17b..8f382da8c5 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -75,6 +75,7 @@ import org.apache.polaris.service.credentials.PolarisCredentialManagerConfiguration; import org.apache.polaris.service.events.PolarisEventListenerConfiguration; import org.apache.polaris.service.events.listeners.PolarisEventListener; +import org.apache.polaris.service.persistence.MetricsPersistenceConfiguration; import org.apache.polaris.service.persistence.PersistenceConfiguration; import org.apache.polaris.service.ratelimiter.RateLimiter; import org.apache.polaris.service.ratelimiter.RateLimiterFilterConfiguration; @@ -228,24 +229,20 @@ public PolarisMetaStoreManager polarisMetaStoreManager( /** * Produces a {@link MetricsPersistence} bean for the current request. * - *

        This method selects a MetricsPersistence implementation based on the configured persistence - * type. If a backend-specific implementation is available (e.g., JDBC with metrics schema), it - * will be used. Otherwise, falls back to the no-op implementation. + *

        This method selects a MetricsPersistence implementation based on the configured metrics + * persistence type. The type is configured independently from the entity metastore via {@code + * polaris.persistence.metrics.type}. * - * @param config the persistence configuration + * @param config the metrics persistence configuration * @param metricsPersistenceImpls all available MetricsPersistence implementations * @return a MetricsPersistence implementation for the current realm */ @Produces @RequestScoped public MetricsPersistence metricsPersistence( - PersistenceConfiguration config, @Any Instance metricsPersistenceImpls) { - Instance selected = - metricsPersistenceImpls.select(Identifier.Literal.of(config.type())); - if (selected.isResolvable()) { - return selected.get(); - } - return MetricsPersistence.NOOP; + MetricsPersistenceConfiguration config, + @Any Instance metricsPersistenceImpls) { + return metricsPersistenceImpls.select(Identifier.Literal.of(config.type())).get(); } @Produces diff --git a/runtime/service/src/main/java/org/apache/polaris/service/persistence/MetricsPersistenceConfiguration.java b/runtime/service/src/main/java/org/apache/polaris/service/persistence/MetricsPersistenceConfiguration.java new file mode 100644 index 0000000000..9e990553bd --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/persistence/MetricsPersistenceConfiguration.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.persistence; + +import io.smallrye.config.ConfigMapping; +import io.smallrye.config.WithDefault; + +/** + * Configuration for selecting the {@link + * org.apache.polaris.core.persistence.metrics.MetricsPersistence} implementation. + * + *

        This configuration allows selecting the metrics persistence backend independently from the + * entity metastore. Available types include: + * + *

          + *
        • {@code noop} (default) - No persistence, metrics are discarded + *
        • {@code relational-jdbc} - Persists metrics to the JDBC database (requires metrics schema) + *
        + */ +@ConfigMapping(prefix = "polaris.persistence.metrics") +public interface MetricsPersistenceConfiguration { + + /** + * The type of the metrics persistence to use. Must be a registered {@link + * org.apache.polaris.core.persistence.metrics.MetricsPersistence} identifier. + * + *

        Defaults to {@code noop} which discards all metrics. + */ + @WithDefault("noop") + String type(); +} diff --git a/runtime/service/src/main/java/org/apache/polaris/service/persistence/NoopMetricsPersistence.java b/runtime/service/src/main/java/org/apache/polaris/service/persistence/NoopMetricsPersistence.java new file mode 100644 index 0000000000..ef4a1ef88b --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/persistence/NoopMetricsPersistence.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.persistence; + +import io.smallrye.common.annotation.Identifier; +import jakarta.annotation.Nonnull; +import jakarta.enterprise.context.ApplicationScoped; +import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; +import org.apache.polaris.core.persistence.metrics.MetricsPersistence; +import org.apache.polaris.core.persistence.metrics.MetricsQueryCriteria; +import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; +import org.apache.polaris.core.persistence.pagination.Page; +import org.apache.polaris.core.persistence.pagination.PageToken; + +/** + * A CDI-managed no-op implementation of {@link MetricsPersistence}. + * + *

        This bean is selected when {@code polaris.persistence.metrics.type} is set to {@code "noop"} + * (the default). All write operations are silently ignored, and all query operations return empty + * pages. + * + *

        This is useful when metrics persistence is not needed or when the persistence backend does not + * support metrics storage. + * + * @see MetricsPersistence#NOOP + */ +@ApplicationScoped +@Identifier("noop") +public class NoopMetricsPersistence implements MetricsPersistence { + + @Override + public void writeScanReport(@Nonnull ScanMetricsRecord record) { + MetricsPersistence.NOOP.writeScanReport(record); + } + + @Override + public void writeCommitReport(@Nonnull CommitMetricsRecord record) { + MetricsPersistence.NOOP.writeCommitReport(record); + } + + @Nonnull + @Override + public Page queryScanReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { + return MetricsPersistence.NOOP.queryScanReports(criteria, pageToken); + } + + @Nonnull + @Override + public Page queryCommitReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { + return MetricsPersistence.NOOP.queryCommitReports(criteria, pageToken); + } +} From c23b162e53f48830173e12c2dedcf7a54edff3bf Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 17:23:42 -0800 Subject: [PATCH 66/67] feat: Implement separate MetricsSchemaBootstrap SPI handler This commit implements a decoupled bootstrap handler for the metrics schema, following dimas-b's suggestion in PR review comment r2775702215. Changes: - Create MetricsSchemaBootstrap SPI interface in polaris-core - Defines bootstrap(realmId) and isBootstrapped(realmId) methods - Includes NOOP constant for backends that don't support metrics - Annotated with @Beta since the API is experimental - Create JdbcMetricsSchemaBootstrap implementation in polaris-relational-jdbc - Executes schema-metrics-v1.sql for H2/PostgreSQL - Idempotent: checks metrics_version table before bootstrapping - Uses DatasourceOperations for database access - Create MetricsSchemaVersion model class for metrics_version table - Add generateMetricsVersionQuery() to QueryGenerator - Add CDI producers for MetricsSchemaBootstrap - JdbcMetricsPersistenceProducer: produces @Identifier("relational-jdbc") - ServiceProducers: produces @Identifier("noop") - Update JdbcMetaStoreManagerFactory to use injected MetricsSchemaBootstrap instead of inline metrics bootstrap logic - Add 'bootstrap-metrics' CLI command (BootstrapMetricsCommand) - Allows operators to bootstrap metrics schema independently - Supports multiple realms: -r realm1 -r realm2 - Idempotent: skips already-bootstrapped realms - Simplify NoopMetricsPersistence (per r2776432583) - Replace delegator class with simple producer method in ServiceProducers - Delete NoopMetricsPersistence.java This enables operators to add metrics persistence support to existing Polaris deployments without re-bootstrapping the entity schema. --- .../jdbc/JdbcMetaStoreManagerFactory.java | 11 +- .../jdbc/JdbcMetricsPersistenceProducer.java | 25 ++++ .../jdbc/JdbcMetricsSchemaBootstrap.java | 104 +++++++++++++++++ .../relational/jdbc/QueryGenerator.java | 7 ++ .../jdbc/models/MetricsSchemaVersion.java | 61 ++++++++++ .../metrics/MetricsSchemaBootstrap.java | 107 ++++++++++++++++++ .../admintool/BootstrapMetricsCommand.java | 95 ++++++++++++++++ .../polaris/admintool/PolarisAdminTool.java | 1 + .../service/config/ServiceProducers.java | 31 +++++ .../persistence/NoopMetricsPersistence.java | 70 ------------ 10 files changed, 438 insertions(+), 74 deletions(-) create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsSchemaBootstrap.java create mode 100644 persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsSchemaVersion.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsSchemaBootstrap.java create mode 100644 runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapMetricsCommand.java delete mode 100644 runtime/service/src/main/java/org/apache/polaris/service/persistence/NoopMetricsPersistence.java diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java index 24c1d025ca..28dbfb6eac 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java @@ -52,6 +52,7 @@ import org.apache.polaris.core.persistence.cache.InMemoryEntityCache; import org.apache.polaris.core.persistence.dao.entity.BaseResult; import org.apache.polaris.core.persistence.dao.entity.PrincipalSecretsResult; +import org.apache.polaris.core.persistence.metrics.MetricsSchemaBootstrap; import org.apache.polaris.core.storage.PolarisStorageIntegrationProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,6 +79,10 @@ public class JdbcMetaStoreManagerFactory implements MetaStoreManagerFactory { @Inject RelationalJdbcConfiguration relationalJdbcConfiguration; @Inject RealmConfig realmConfig; + @Inject + @Identifier("relational-jdbc") + MetricsSchemaBootstrap metricsSchemaBootstrap; + protected JdbcMetaStoreManagerFactory() {} protected PrincipalSecretsGenerator secretsGenerator( @@ -173,11 +178,9 @@ public synchronized Map bootstrapRealms( .getDatabaseType() .openInitScriptResource(effectiveSchemaVersion)); - // Run the metrics schema script if requested + // Run the metrics schema bootstrap if requested if (JdbcBootstrapUtils.shouldIncludeMetrics(bootstrapOptions)) { - LOGGER.info("Including metrics schema for realm: {}", realm); - datasourceOperations.executeScript( - datasourceOperations.getDatabaseType().openMetricsSchemaResource(1)); + metricsSchemaBootstrap.bootstrap(realm); } } catch (SQLException e) { throw new RuntimeException( diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java index 17bf816b02..d65a5d7de7 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsPersistenceProducer.java @@ -30,6 +30,7 @@ import org.apache.polaris.core.config.RealmConfig; import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.core.persistence.metrics.MetricsPersistence; +import org.apache.polaris.core.persistence.metrics.MetricsSchemaBootstrap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -95,4 +96,28 @@ public MetricsPersistence metricsPersistence(RealmContext realmContext, RealmCon return MetricsPersistence.NOOP; } } + + /** + * Produces a {@link MetricsSchemaBootstrap} instance for the JDBC backend. + * + *

        This producer creates a {@link JdbcMetricsSchemaBootstrap} that can bootstrap the metrics + * schema tables independently from the entity schema. + * + * @return a MetricsSchemaBootstrap implementation for JDBC + */ + @Produces + @ApplicationScoped + @Identifier("relational-jdbc") + public MetricsSchemaBootstrap metricsSchemaBootstrap() { + try { + DatasourceOperations datasourceOperations = + new DatasourceOperations(dataSource.get(), relationalJdbcConfiguration); + return new JdbcMetricsSchemaBootstrap(datasourceOperations); + } catch (SQLException e) { + LOGGER.warn( + "Failed to create JdbcMetricsSchemaBootstrap due to {}. Returning NOOP implementation.", + e.getMessage()); + return MetricsSchemaBootstrap.NOOP; + } + } } diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsSchemaBootstrap.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsSchemaBootstrap.java new file mode 100644 index 0000000000..0802f1f5aa --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcMetricsSchemaBootstrap.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc; + +import java.sql.SQLException; +import java.util.List; +import org.apache.polaris.core.persistence.metrics.MetricsSchemaBootstrap; +import org.apache.polaris.persistence.relational.jdbc.models.MetricsSchemaVersion; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * JDBC implementation of {@link MetricsSchemaBootstrap}. + * + *

        This implementation creates the metrics schema tables (scan_metrics_report, + * commit_metrics_report, metrics_version) in the configured JDBC database. + * + *

        The metrics schema is separate from the entity schema and can be bootstrapped independently. + * This allows operators to add metrics support to existing Polaris deployments without + * re-bootstrapping the entity schema. + */ +public class JdbcMetricsSchemaBootstrap implements MetricsSchemaBootstrap { + + private static final Logger LOGGER = LoggerFactory.getLogger(JdbcMetricsSchemaBootstrap.class); + + /** Current metrics schema version. */ + private static final int METRICS_SCHEMA_VERSION = 1; + + private final DatasourceOperations datasourceOperations; + + public JdbcMetricsSchemaBootstrap(DatasourceOperations datasourceOperations) { + this.datasourceOperations = datasourceOperations; + } + + @Override + public void bootstrap(String realmId) { + if (isBootstrapped(realmId)) { + LOGGER.debug("Metrics schema already bootstrapped for realm: {}", realmId); + return; + } + + LOGGER.info("Bootstrapping metrics schema v{} for realm: {}", METRICS_SCHEMA_VERSION, realmId); + + try { + datasourceOperations.executeScript( + datasourceOperations.getDatabaseType().openMetricsSchemaResource(METRICS_SCHEMA_VERSION)); + LOGGER.info( + "Successfully bootstrapped metrics schema v{} for realm: {}", + METRICS_SCHEMA_VERSION, + realmId); + } catch (SQLException e) { + throw new RuntimeException( + String.format( + "Failed to bootstrap metrics schema for realm '%s': %s", realmId, e.getMessage()), + e); + } + } + + @Override + public boolean isBootstrapped(String realmId) { + return loadMetricsSchemaVersion() > 0; + } + + /** + * Loads the current metrics schema version from the database. + * + * @return the metrics schema version, or 0 if not bootstrapped + */ + int loadMetricsSchemaVersion() { + QueryGenerator.PreparedQuery query = QueryGenerator.generateMetricsVersionQuery(); + try { + List versions = + datasourceOperations.executeSelect(query, new MetricsSchemaVersion()); + if (versions == null || versions.isEmpty()) { + return 0; + } + return versions.getFirst().getValue(); + } catch (SQLException e) { + if (datasourceOperations.isRelationDoesNotExist(e)) { + // Table doesn't exist yet - schema not bootstrapped + LOGGER.debug("Metrics schema version table not found: {}", e.getMessage()); + return 0; + } + LOGGER.error("Failed to load metrics schema version due to {}", e.getMessage(), e); + throw new IllegalStateException("Failed to retrieve metrics schema version", e); + } + } +} diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/QueryGenerator.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/QueryGenerator.java index 423e965bd9..8f770b521b 100644 --- a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/QueryGenerator.java +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/QueryGenerator.java @@ -280,6 +280,13 @@ static PreparedQuery generateVersionQuery() { return new PreparedQuery("SELECT version_value FROM POLARIS_SCHEMA.VERSION", List.of()); } + @VisibleForTesting + static PreparedQuery generateMetricsVersionQuery() { + return new PreparedQuery( + "SELECT version_value FROM POLARIS_SCHEMA.metrics_version WHERE version_key = 'metrics_version'", + List.of()); + } + @VisibleForTesting static PreparedQuery generateEntityTableExistQuery() { return new PreparedQuery( diff --git a/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsSchemaVersion.java b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsSchemaVersion.java new file mode 100644 index 0000000000..b6b73426ab --- /dev/null +++ b/persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/models/MetricsSchemaVersion.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.relational.jdbc.models; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Map; +import org.apache.polaris.persistence.relational.jdbc.DatabaseType; + +/** + * Model class for the metrics schema version table. + * + *

        This is separate from {@link SchemaVersion} which tracks the entity schema version. The + * metrics schema can evolve independently from the entity schema. + */ +public class MetricsSchemaVersion implements Converter { + + private final Integer value; + + public MetricsSchemaVersion() { + this.value = null; + } + + private MetricsSchemaVersion(int value) { + this.value = value; + } + + public int getValue() { + if (value == null) { + throw new IllegalStateException( + "Metrics schema version should be constructed via fromResultSet"); + } + return value; + } + + @Override + public MetricsSchemaVersion fromResultSet(ResultSet rs) throws SQLException { + return new MetricsSchemaVersion(rs.getInt("version_value")); + } + + @Override + public Map toMap(DatabaseType databaseType) { + return Map.of("version_value", this.value); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsSchemaBootstrap.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsSchemaBootstrap.java new file mode 100644 index 0000000000..eca6ea282a --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsSchemaBootstrap.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import com.google.common.annotations.Beta; + +/** + * Service Provider Interface (SPI) for bootstrapping the metrics schema. + * + *

        This interface enables different persistence backends (JDBC, NoSQL, custom) to implement + * metrics schema initialization in a way appropriate for their storage model. The metrics schema is + * separate from the entity schema and can be bootstrapped independently. + * + *

        Implementations should be idempotent - calling {@link #bootstrap(String)} multiple times on + * the same realm should have no effect after the first successful call. + * + *

        Dependency Injection

        + * + *

        This interface is designed to be injected via CDI (Contexts and Dependency Injection). The + * deployment module should provide a {@code @Produces} method that creates the appropriate + * implementation based on the configured persistence backend. + * + *

        Usage

        + * + *

        The metrics schema can be bootstrapped: + * + *

          + *
        • During initial realm bootstrap with the {@code --include-metrics} flag + *
        • Independently via the {@code bootstrap-metrics} CLI command + *
        • Programmatically by injecting this interface and calling {@link #bootstrap(String)} + *
        + * + *

        Note: This SPI is currently experimental. The API may change in future releases. + * + * @see MetricsPersistence + */ +@Beta +public interface MetricsSchemaBootstrap { + + /** + * A no-op implementation for backends that don't support metrics schema bootstrap. + * + *

        This implementation always reports the schema as bootstrapped and does nothing when {@link + * #bootstrap(String)} is called. + */ + MetricsSchemaBootstrap NOOP = + new MetricsSchemaBootstrap() { + @Override + public void bootstrap(String realmId) { + // No-op: metrics schema bootstrap not supported + } + + @Override + public boolean isBootstrapped(String realmId) { + // Always report as bootstrapped to avoid errors + return true; + } + + @Override + public String toString() { + return "MetricsSchemaBootstrap.NOOP"; + } + }; + + /** + * Bootstraps the metrics schema for the specified realm. + * + *

        This operation is idempotent - calling it multiple times on the same realm should have no + * effect after the first successful call. + * + *

        Implementations should: + * + *

          + *
        • Create the necessary tables/collections for storing metrics data + *
        • Create any required indexes for efficient querying + *
        • Record the metrics schema version for future migrations + *
        + * + * @param realmId the realm identifier to bootstrap the metrics schema for + * @throws RuntimeException if the bootstrap operation fails + */ + void bootstrap(String realmId); + + /** + * Checks if the metrics schema has been bootstrapped for the specified realm. + * + * @param realmId the realm identifier to check + * @return {@code true} if the metrics schema is already bootstrapped, {@code false} otherwise + */ + boolean isBootstrapped(String realmId); +} diff --git a/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapMetricsCommand.java b/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapMetricsCommand.java new file mode 100644 index 0000000000..97019b63a0 --- /dev/null +++ b/runtime/admin/src/main/java/org/apache/polaris/admintool/BootstrapMetricsCommand.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.admintool; + +import io.smallrye.common.annotation.Identifier; +import jakarta.inject.Inject; +import java.util.List; +import org.apache.polaris.core.persistence.metrics.MetricsSchemaBootstrap; +import picocli.CommandLine; + +/** + * CLI command to bootstrap the metrics schema independently from the entity schema. + * + *

        This command allows operators to add metrics persistence support to an existing Polaris + * deployment without re-bootstrapping the entity schema. It is idempotent - running it multiple + * times on the same realm has no effect after the first successful run. + * + *

        Example usage: + * + *

        {@code
        + * polaris-admin bootstrap-metrics -r my-realm
        + * polaris-admin bootstrap-metrics -r realm1 -r realm2
        + * }
        + */ +@CommandLine.Command( + name = "bootstrap-metrics", + mixinStandardHelpOptions = true, + description = "Bootstraps the metrics schema for existing realms.") +public class BootstrapMetricsCommand extends BaseCommand { + + @Inject + @Identifier("relational-jdbc") + MetricsSchemaBootstrap metricsSchemaBootstrap; + + @CommandLine.Option( + names = {"-r", "--realm"}, + paramLabel = "", + required = true, + description = "The name of a realm to bootstrap metrics for.") + List realms; + + @Override + public Integer call() { + boolean success = true; + + for (String realm : realms) { + try { + if (metricsSchemaBootstrap.isBootstrapped(realm)) { + spec.commandLine() + .getOut() + .printf("Metrics schema already bootstrapped for realm '%s'. Skipping.%n", realm); + } else { + spec.commandLine() + .getOut() + .printf("Bootstrapping metrics schema for realm '%s'...%n", realm); + metricsSchemaBootstrap.bootstrap(realm); + spec.commandLine() + .getOut() + .printf("Metrics schema successfully bootstrapped for realm '%s'.%n", realm); + } + } catch (Exception e) { + spec.commandLine() + .getErr() + .printf( + "Failed to bootstrap metrics schema for realm '%s': %s%n", realm, e.getMessage()); + e.printStackTrace(spec.commandLine().getErr()); + success = false; + } + } + + if (success) { + spec.commandLine().getOut().println("Metrics bootstrap completed successfully."); + return 0; + } else { + spec.commandLine().getErr().println("Metrics bootstrap encountered errors during operation."); + return EXIT_CODE_BOOTSTRAP_ERROR; + } + } +} diff --git a/runtime/admin/src/main/java/org/apache/polaris/admintool/PolarisAdminTool.java b/runtime/admin/src/main/java/org/apache/polaris/admintool/PolarisAdminTool.java index 66ddaf0547..aa02797f3b 100644 --- a/runtime/admin/src/main/java/org/apache/polaris/admintool/PolarisAdminTool.java +++ b/runtime/admin/src/main/java/org/apache/polaris/admintool/PolarisAdminTool.java @@ -30,6 +30,7 @@ subcommands = { HelpCommand.class, BootstrapCommand.class, + BootstrapMetricsCommand.class, PurgeCommand.class, }) public class PolarisAdminTool extends BaseMetaStoreCommand { diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 8f382da8c5..121250fb0f 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -50,6 +50,7 @@ import org.apache.polaris.core.persistence.bootstrap.RootCredentialsSet; import org.apache.polaris.core.persistence.cache.EntityCache; import org.apache.polaris.core.persistence.metrics.MetricsPersistence; +import org.apache.polaris.core.persistence.metrics.MetricsSchemaBootstrap; import org.apache.polaris.core.persistence.resolver.ResolutionManifestFactory; import org.apache.polaris.core.persistence.resolver.ResolutionManifestFactoryImpl; import org.apache.polaris.core.persistence.resolver.Resolver; @@ -226,6 +227,36 @@ public PolarisMetaStoreManager polarisMetaStoreManager( return metaStoreManagerFactory.getOrCreateMetaStoreManager(realmContext); } + /** + * Produces a no-op {@link MetricsPersistence} bean. + * + *

        This bean is selected when {@code polaris.persistence.metrics.type} is set to {@code "noop"} + * (the default). All write operations are silently ignored, and all query operations return empty + * pages. + * + * @return the no-op MetricsPersistence singleton + */ + @Produces + @Identifier("noop") + public MetricsPersistence noopMetricsPersistence() { + return MetricsPersistence.NOOP; + } + + /** + * Produces a no-op {@link MetricsSchemaBootstrap} bean. + * + *

        This bean is selected for backends that don't support metrics schema bootstrap. The {@link + * MetricsSchemaBootstrap#bootstrap(String)} method does nothing, and {@link + * MetricsSchemaBootstrap#isBootstrapped(String)} always returns {@code true}. + * + * @return the no-op MetricsSchemaBootstrap singleton + */ + @Produces + @Identifier("noop") + public MetricsSchemaBootstrap noopMetricsSchemaBootstrap() { + return MetricsSchemaBootstrap.NOOP; + } + /** * Produces a {@link MetricsPersistence} bean for the current request. * diff --git a/runtime/service/src/main/java/org/apache/polaris/service/persistence/NoopMetricsPersistence.java b/runtime/service/src/main/java/org/apache/polaris/service/persistence/NoopMetricsPersistence.java deleted file mode 100644 index ef4a1ef88b..0000000000 --- a/runtime/service/src/main/java/org/apache/polaris/service/persistence/NoopMetricsPersistence.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.service.persistence; - -import io.smallrye.common.annotation.Identifier; -import jakarta.annotation.Nonnull; -import jakarta.enterprise.context.ApplicationScoped; -import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; -import org.apache.polaris.core.persistence.metrics.MetricsPersistence; -import org.apache.polaris.core.persistence.metrics.MetricsQueryCriteria; -import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; -import org.apache.polaris.core.persistence.pagination.Page; -import org.apache.polaris.core.persistence.pagination.PageToken; - -/** - * A CDI-managed no-op implementation of {@link MetricsPersistence}. - * - *

        This bean is selected when {@code polaris.persistence.metrics.type} is set to {@code "noop"} - * (the default). All write operations are silently ignored, and all query operations return empty - * pages. - * - *

        This is useful when metrics persistence is not needed or when the persistence backend does not - * support metrics storage. - * - * @see MetricsPersistence#NOOP - */ -@ApplicationScoped -@Identifier("noop") -public class NoopMetricsPersistence implements MetricsPersistence { - - @Override - public void writeScanReport(@Nonnull ScanMetricsRecord record) { - MetricsPersistence.NOOP.writeScanReport(record); - } - - @Override - public void writeCommitReport(@Nonnull CommitMetricsRecord record) { - MetricsPersistence.NOOP.writeCommitReport(record); - } - - @Nonnull - @Override - public Page queryScanReports( - @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { - return MetricsPersistence.NOOP.queryScanReports(criteria, pageToken); - } - - @Nonnull - @Override - public Page queryCommitReports( - @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { - return MetricsPersistence.NOOP.queryCommitReports(criteria, pageToken); - } -} From 4e1a2a6b843a22fb0c5571eebe5b0da9124bf88c Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 18:29:32 -0800 Subject: [PATCH 67/67] Add unit tests for BootstrapMetricsCommand --- .../BootstrapMetricsCommandTestBase.java | 63 +++++++++++ ...tionalJdbcBootstrapMetricsCommandTest.java | 101 ++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 runtime/admin/src/test/java/org/apache/polaris/admintool/BootstrapMetricsCommandTestBase.java create mode 100644 runtime/admin/src/test/java/org/apache/polaris/admintool/relational/jdbc/RelationalJdbcBootstrapMetricsCommandTest.java diff --git a/runtime/admin/src/test/java/org/apache/polaris/admintool/BootstrapMetricsCommandTestBase.java b/runtime/admin/src/test/java/org/apache/polaris/admintool/BootstrapMetricsCommandTestBase.java new file mode 100644 index 0000000000..7e52bf27c9 --- /dev/null +++ b/runtime/admin/src/test/java/org/apache/polaris/admintool/BootstrapMetricsCommandTestBase.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.admintool; + +import static org.apache.polaris.admintool.BaseCommand.EXIT_CODE_USAGE; +import static org.assertj.core.api.Assertions.assertThat; + +import io.quarkus.test.junit.main.Launch; +import io.quarkus.test.junit.main.LaunchResult; +import io.quarkus.test.junit.main.QuarkusMainTest; +import org.junit.jupiter.api.Test; + +/** + * Base test class for {@link BootstrapMetricsCommand}. + * + *

        Subclasses should provide the appropriate test profile for their persistence backend. + */ +@QuarkusMainTest +public abstract class BootstrapMetricsCommandTestBase { + + @Test + @Launch(value = {"bootstrap-metrics", "--help"}) + public void testBootstrapMetricsHelp(LaunchResult result) { + assertThat(result.getOutput()) + .contains("bootstrap-metrics") + .contains("Bootstraps the metrics schema for existing realms") + .contains("-r, --realm"); + } + + @Test + @Launch( + value = {"bootstrap-metrics"}, + exitCode = EXIT_CODE_USAGE) + public void testBootstrapMetricsMissingRealm(LaunchResult result) { + assertThat(result.getErrorOutput()).contains("Missing required option: '--realm='"); + } + + @Test + @Launch( + value = {"bootstrap-metrics", "-r", "realm1", "--not-real-arg"}, + exitCode = EXIT_CODE_USAGE) + public void testBootstrapMetricsInvalidArg(LaunchResult result) { + assertThat(result.getErrorOutput()) + .contains("Unknown option: '--not-real-arg'") + .contains("Usage:"); + } +} diff --git a/runtime/admin/src/test/java/org/apache/polaris/admintool/relational/jdbc/RelationalJdbcBootstrapMetricsCommandTest.java b/runtime/admin/src/test/java/org/apache/polaris/admintool/relational/jdbc/RelationalJdbcBootstrapMetricsCommandTest.java new file mode 100644 index 0000000000..5c5836dc13 --- /dev/null +++ b/runtime/admin/src/test/java/org/apache/polaris/admintool/relational/jdbc/RelationalJdbcBootstrapMetricsCommandTest.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.admintool.relational.jdbc; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.quarkus.test.junit.TestProfile; +import io.quarkus.test.junit.main.LaunchResult; +import io.quarkus.test.junit.main.QuarkusMainLauncher; +import org.apache.polaris.admintool.BootstrapMetricsCommandTestBase; +import org.junit.jupiter.api.Test; + +/** + * JDBC-specific tests for {@link org.apache.polaris.admintool.BootstrapMetricsCommand}. + * + *

        These tests verify the bootstrap-metrics command works correctly with the JDBC persistence + * backend. + * + *

        Note: Tests that require state persistence across multiple {@code launcher.launch()} + * calls are not possible with the current test framework because each launch gets a fresh + * PostgreSQL database. See the TODO comment in {@link + * RelationalJdbcBootstrapCommandTest#testBootstrapFailsWhenAddingRealmWithDifferentSchemaVersion} + * for details. + */ +@TestProfile(RelationalJdbcAdminProfile.class) +public class RelationalJdbcBootstrapMetricsCommandTest extends BootstrapMetricsCommandTestBase { + + @Test + public void testBootstrapMetricsForSingleRealm(QuarkusMainLauncher launcher) { + // Bootstrap entity schema and metrics schema in one launch using --include-metrics. + // Note: Each launcher.launch() gets a fresh database, so we use --include-metrics + // to bootstrap both entity and metrics schema in a single launch. + LaunchResult bootstrapResult = + launcher.launch( + "bootstrap", + "-r", + "metrics-realm1", + "-c", + "metrics-realm1,root,s3cr3t", + "--include-metrics"); + assertThat(bootstrapResult.exitCode()).isEqualTo(0); + assertThat(bootstrapResult.getOutput()) + .contains("Realm 'metrics-realm1' successfully bootstrapped."); + } + + @Test + public void testBootstrapMetricsMultipleRealmsInSingleLaunch(QuarkusMainLauncher launcher) { + // Bootstrap entity schema and metrics schema for multiple realms in one launch + LaunchResult result = + launcher.launch( + "bootstrap", + "-r", + "metrics-realm3", + "-r", + "metrics-realm4", + "-c", + "metrics-realm3,root,s3cr3t", + "-c", + "metrics-realm4,root,s3cr3t", + "--include-metrics"); + assertThat(result.exitCode()).isEqualTo(0); + assertThat(result.getOutput()) + .contains("Realm 'metrics-realm3' successfully bootstrapped.") + .contains("Realm 'metrics-realm4' successfully bootstrapped.") + .contains("Bootstrap completed successfully."); + } + + // TODO: Enable these tests once we enable postgres container reuse across launches. + // See + // RelationalJdbcBootstrapCommandTest#testBootstrapFailsWhenAddingRealmWithDifferentSchemaVersion + // + // @Test + // public void testBootstrapMetricsIdempotent(QuarkusMainLauncher launcher) { + // // First launch: bootstrap entity schema and metrics schema + // LaunchResult result1 = launcher.launch( + // "bootstrap", "-r", "realm1", "-c", "realm1,root,s3cr3t", "--include-metrics"); + // assertThat(result1.exitCode()).isEqualTo(0); + // + // // Second launch: bootstrap-metrics should detect it's already bootstrapped + // LaunchResult result2 = launcher.launch("bootstrap-metrics", "-r", "realm1"); + // assertThat(result2.exitCode()).isEqualTo(0); + // assertThat(result2.getOutput()) + // .contains("Metrics schema already bootstrapped for realm 'realm1'. Skipping."); + // } +}