From af07d424664c859c8aebf735f036ca4ffefb7ef4 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 29 Jan 2026 11:29:23 -0800 Subject: [PATCH 01/12] feat(metrics): Add MetricsPersistence SPI for backend-agnostic metrics storage This commit introduces a Service Provider Interface (SPI) for persisting Iceberg metrics reports, addressing the extensibility concerns raised in the design review. Key components: - MetricsPersistence: Main SPI interface with write and query operations - NoOpMetricsPersistence: Default do-nothing implementation for backends that don't support metrics persistence - ScanMetricsRecord: Immutable interface for scan metrics data - CommitMetricsRecord: Immutable interface for commit metrics data - MetricsQueryCriteria: Query parameters with filtering and pagination - MetricsContext: Context for conversion (realm, catalog, principal info) - MetricsPersistenceFactory: Factory for realm-scoped instances - MetricsRecordConverter: Converts Iceberg reports to SPI records Design principles: - Backend-agnostic: Can be implemented by JDBC, NoSQL, or custom backends - No instanceof checks: Service code calls interface methods directly - Idempotent writes: Same reportId written twice has no effect - Graceful degradation: Unsupported backends return empty results Relates to: #3337 --- .../metrics/CommitMetricsRecord.java | 165 +++++++++++++++ .../persistence/metrics/MetricsContext.java | 65 ++++++ .../metrics/MetricsPersistence.java | 103 ++++++++++ .../metrics/MetricsPersistenceFactory.java | 41 ++++ .../metrics/MetricsQueryCriteria.java | 139 +++++++++++++ .../metrics/MetricsRecordConverter.java | 188 ++++++++++++++++++ .../metrics/NoOpMetricsPersistence.java | 63 ++++++ .../metrics/ScanMetricsRecord.java | 164 +++++++++++++++ 8 files changed, 928 insertions(+) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java new file mode 100644 index 0000000000..25a1044a82 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.Map; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Backend-agnostic representation of an Iceberg commit metrics report. + * + *

This record captures all relevant metrics from an Iceberg {@code CommitReport} along with + * contextual information such as realm, catalog, and request correlation data. + */ +@PolarisImmutable +public interface CommitMetricsRecord { + + // === Identification === + + /** Unique identifier for this report (UUID). */ + String reportId(); + + /** Multi-tenancy realm identifier. */ + String realmId(); + + /** Internal catalog ID. */ + String catalogId(); + + /** Human-readable catalog name. */ + String catalogName(); + + /** Dot-separated namespace path (e.g., "db.schema"). */ + String namespace(); + + /** Table name. */ + String tableName(); + + // === Timing === + + /** Timestamp when the report was received. */ + Instant timestamp(); + + // === Request Context === + + /** Name of the principal who initiated the operation. */ + Optional principalName(); + + /** Request ID for correlation. */ + Optional requestId(); + + /** OpenTelemetry trace ID for distributed tracing. */ + Optional otelTraceId(); + + /** OpenTelemetry span ID for distributed tracing. */ + Optional otelSpanId(); + + /** Trace ID from the report itself (may differ from OTel trace). */ + Optional reportTraceId(); + + // === Commit Context === + + /** Snapshot ID created by this commit. */ + long snapshotId(); + + /** Sequence number of the snapshot. */ + Optional sequenceNumber(); + + /** Operation type (e.g., "append", "overwrite", "delete"). */ + String operation(); + + // === File Metrics - Data Files === + + /** Number of data files added. */ + long addedDataFiles(); + + /** Number of data files removed. */ + long removedDataFiles(); + + /** Total number of data files after commit. */ + long totalDataFiles(); + + // === File Metrics - Delete Files === + + /** Number of delete files added. */ + long addedDeleteFiles(); + + /** Number of delete files removed. */ + long removedDeleteFiles(); + + /** Total number of delete files after commit. */ + long totalDeleteFiles(); + + /** Number of equality delete files added. */ + long addedEqualityDeleteFiles(); + + /** Number of equality delete files removed. */ + long removedEqualityDeleteFiles(); + + /** Number of positional delete files added. */ + long addedPositionalDeleteFiles(); + + /** Number of positional delete files removed. */ + long removedPositionalDeleteFiles(); + + // === Record Metrics === + + /** Number of records added. */ + long addedRecords(); + + /** Number of records removed. */ + long removedRecords(); + + /** Total number of records after commit. */ + long totalRecords(); + + // === Size Metrics === + + /** Size of added files in bytes. */ + long addedFileSizeBytes(); + + /** Size of removed files in bytes. */ + long removedFileSizeBytes(); + + /** Total file size in bytes after commit. */ + long totalFileSizeBytes(); + + // === Timing === + + /** Total duration of the commit in milliseconds. */ + Optional totalDurationMs(); + + /** Number of commit attempts. */ + int attempts(); + + // === Extensibility === + + /** Additional metadata as key-value pairs. */ + Map metadata(); + + /** + * Creates a new builder for CommitMetricsRecord. + * + * @return a new builder instance + */ + static ImmutableCommitMetricsRecord.Builder builder() { + return ImmutableCommitMetricsRecord.builder(); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java new file mode 100644 index 0000000000..d63800f82f --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Context information needed when converting Iceberg metrics reports to persistence records. + * + *

This context captures information from the request environment that is not available in the + * Iceberg report itself, such as realm, catalog, principal, and tracing information. + */ +@PolarisImmutable +public interface MetricsContext { + + /** Multi-tenancy realm identifier. */ + String realmId(); + + /** Internal catalog ID. */ + String catalogId(); + + /** Human-readable catalog name. */ + String catalogName(); + + /** Dot-separated namespace path (e.g., "db.schema"). */ + String namespace(); + + /** Name of the principal who initiated the operation. */ + Optional principalName(); + + /** Request ID for correlation. */ + Optional requestId(); + + /** OpenTelemetry trace ID for distributed tracing. */ + Optional otelTraceId(); + + /** OpenTelemetry span ID for distributed tracing. */ + Optional otelSpanId(); + + /** + * Creates a new builder for MetricsContext. + * + * @return a new builder instance + */ + static ImmutableMetricsContext.Builder builder() { + return ImmutableMetricsContext.builder(); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java new file mode 100644 index 0000000000..f13058933a --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import jakarta.annotation.Nonnull; +import java.util.List; + +/** + * Service Provider Interface (SPI) for persisting Iceberg metrics reports. + * + *

This interface enables different persistence backends (JDBC, NoSQL, custom) to implement + * metrics storage in a way appropriate for their storage model, while allowing service code to + * remain backend-agnostic. + * + *

Implementations should be idempotent - writing the same reportId twice should have no effect. + * Implementations that don't support metrics persistence should return {@link #NOOP}. + */ +public interface MetricsPersistence { + + /** A no-op implementation for backends that don't support metrics persistence. */ + MetricsPersistence NOOP = new NoOpMetricsPersistence(); + + // ============================================================================ + // Capability Detection + // ============================================================================ + + /** + * Returns whether this persistence backend supports metrics storage. + * + *

Backends that do not support metrics should return false. Service code should NOT use this + * to branch with instanceof checks - instead, call the interface methods directly and rely on the + * no-op behavior for unsupported backends. + * + * @return true if metrics persistence is supported, false otherwise + */ + boolean isSupported(); + + // ============================================================================ + // Write Operations + // ============================================================================ + + /** + * Persists a scan metrics record. + * + *

This operation is idempotent - writing the same reportId twice has no effect. If {@link + * #isSupported()} returns false, this is a no-op. + * + * @param record the scan metrics record to persist + */ + void writeScanReport(@Nonnull ScanMetricsRecord record); + + /** + * Persists a commit metrics record. + * + *

This operation is idempotent - writing the same reportId twice has no effect. If {@link + * #isSupported()} returns false, this is a no-op. + * + * @param record the commit metrics record to persist + */ + void writeCommitReport(@Nonnull CommitMetricsRecord record); + + // ============================================================================ + // Query Operations + // ============================================================================ + + /** + * Queries scan metrics reports based on the specified criteria. + * + *

Returns an empty list if {@link #isSupported()} returns false. + * + * @param criteria the query criteria + * @return list of matching scan metrics records, or empty list if not supported + */ + @Nonnull + List queryScanReports(@Nonnull MetricsQueryCriteria criteria); + + /** + * Queries commit metrics reports based on the specified criteria. + * + *

Returns an empty list if {@link #isSupported()} returns false. + * + * @param criteria the query criteria + * @return list of matching commit metrics records, or empty list if not supported + */ + @Nonnull + List queryCommitReports(@Nonnull MetricsQueryCriteria criteria); +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java new file mode 100644 index 0000000000..90a56e1ec3 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import org.apache.polaris.core.context.RealmContext; + +/** + * Factory interface for creating {@link MetricsPersistence} instances. + * + *

Implementations may cache instances per realm for efficiency. For backends that don't support + * metrics persistence, implementations should return {@link MetricsPersistence#NOOP}. + */ +public interface MetricsPersistenceFactory { + + /** + * Gets or creates a {@link MetricsPersistence} instance for the given realm. + * + *

Implementations may cache instances per realm. If the persistence backend does not support + * metrics persistence, this method should return {@link MetricsPersistence#NOOP}. + * + * @param realmContext the realm context + * @return a MetricsPersistence instance for the realm, or NOOP if not supported + */ + MetricsPersistence getOrCreateMetricsPersistence(RealmContext realmContext); +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java new file mode 100644 index 0000000000..0336351079 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; +import org.immutables.value.Value; + +/** + * Query criteria for retrieving metrics reports. + * + *

This class defines the parameters that can be used to filter and paginate metrics query + * results. Not all backends may support all query patterns - check the implementation documentation + * for supported query patterns and required indexes. + * + *

Supported Query Patterns

+ * + * + * + * + * + * + * + *
PatternFields UsedIndex Required
By Table + TimecatalogName, namespace, tableName, startTime, endTimeYes (OSS)
By Trace IDotelTraceIdYes (OSS)
By PrincipalprincipalNameNo (custom deployment)
By Time OnlystartTime, endTimePartial (timestamp index)
+ */ +@PolarisImmutable +public interface MetricsQueryCriteria { + + // === Table Identification (optional) === + + /** Catalog name to filter by. */ + Optional catalogName(); + + /** Namespace to filter by (dot-separated). */ + Optional namespace(); + + /** Table name to filter by. */ + Optional tableName(); + + // === Time Range === + + /** Start time for the query (inclusive). */ + Optional startTime(); + + /** End time for the query (exclusive). */ + Optional endTime(); + + // === Correlation === + + /** OpenTelemetry trace ID to filter by. */ + Optional otelTraceId(); + + /** + * Principal name to filter by. + * + *

Note: This query pattern may require a custom index in deployment environments. The OSS + * codebase does not include an index for principal-based queries. + */ + Optional principalName(); + + // === Pagination === + + /** Maximum number of results to return. Defaults to 100. */ + @Value.Default + default int limit() { + return 100; + } + + /** Number of results to skip. Defaults to 0. */ + @Value.Default + default int offset() { + return 0; + } + + /** + * Creates a new builder for MetricsQueryCriteria. + * + * @return a new builder instance + */ + static ImmutableMetricsQueryCriteria.Builder builder() { + return ImmutableMetricsQueryCriteria.builder(); + } + + /** + * Creates criteria for querying by table and time range. + * + * @param catalogName the catalog name + * @param namespace the namespace (dot-separated) + * @param tableName the table name + * @param startTime the start time (inclusive) + * @param endTime the end time (exclusive) + * @param limit maximum number of results + * @return the query criteria + */ + static MetricsQueryCriteria forTable( + String catalogName, + String namespace, + String tableName, + Instant startTime, + Instant endTime, + int limit) { + return builder() + .catalogName(catalogName) + .namespace(namespace) + .tableName(tableName) + .startTime(startTime) + .endTime(endTime) + .limit(limit) + .build(); + } + + /** + * Creates criteria for querying by OpenTelemetry trace ID. + * + * @param traceId the trace ID to search for + * @param limit maximum number of results + * @return the query criteria + */ + static MetricsQueryCriteria forTraceId(String traceId, int limit) { + return builder().otelTraceId(traceId).limit(limit).build(); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java new file mode 100644 index 0000000000..f790e09658 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.Collections; +import java.util.Optional; +import java.util.UUID; +import org.apache.iceberg.metrics.CommitMetricsResult; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.CounterResult; +import org.apache.iceberg.metrics.ScanMetricsResult; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.iceberg.metrics.TimerResult; + +/** + * Utility class for converting Iceberg metrics reports to SPI record types. + * + *

This converter extracts all relevant metrics from Iceberg's {@link ScanReport} and {@link + * CommitReport} and combines them with context information to create persistence-ready records. + */ +public final class MetricsRecordConverter { + + private MetricsRecordConverter() { + // Utility class + } + + /** + * Converts an Iceberg ScanReport to a ScanMetricsRecord. + * + * @param scanReport the Iceberg scan report + * @param tableName the table name + * @param context the metrics context containing realm, catalog, and request information + * @return the scan metrics record ready for persistence + */ + public static ScanMetricsRecord fromScanReport( + ScanReport scanReport, String tableName, MetricsContext context) { + ScanMetricsResult metrics = scanReport.scanMetrics(); + + return ScanMetricsRecord.builder() + .reportId(UUID.randomUUID().toString()) + .realmId(context.realmId()) + .catalogId(context.catalogId()) + .catalogName(context.catalogName()) + .namespace(context.namespace()) + .tableName(tableName) + .timestamp(Instant.now()) + .principalName(context.principalName()) + .requestId(context.requestId()) + .otelTraceId(context.otelTraceId()) + .otelSpanId(context.otelSpanId()) + .reportTraceId(getMetadataValue(scanReport.metadata(), "trace-id")) + .snapshotId(Optional.of(scanReport.snapshotId())) + .schemaId(Optional.of(scanReport.schemaId())) + .filterExpression( + scanReport.filter() != null + ? Optional.of(scanReport.filter().toString()) + : Optional.empty()) + .projectedFieldIds( + scanReport.projectedFieldIds() != null + ? scanReport.projectedFieldIds() + : Collections.emptyList()) + .projectedFieldNames( + scanReport.projectedFieldNames() != null + ? scanReport.projectedFieldNames() + : Collections.emptyList()) + .resultDataFiles(getCounterValue(metrics.resultDataFiles())) + .resultDeleteFiles(getCounterValue(metrics.resultDeleteFiles())) + .totalFileSizeBytes(getCounterValue(metrics.totalFileSizeInBytes())) + .totalDataManifests(getCounterValue(metrics.totalDataManifests())) + .totalDeleteManifests(getCounterValue(metrics.totalDeleteManifests())) + .scannedDataManifests(getCounterValue(metrics.scannedDataManifests())) + .scannedDeleteManifests(getCounterValue(metrics.scannedDeleteManifests())) + .skippedDataManifests(getCounterValue(metrics.skippedDataManifests())) + .skippedDeleteManifests(getCounterValue(metrics.skippedDeleteManifests())) + .skippedDataFiles(getCounterValue(metrics.skippedDataFiles())) + .skippedDeleteFiles(getCounterValue(metrics.skippedDeleteFiles())) + .totalPlanningDurationMs(getTimerValueMs(metrics.totalPlanningDuration())) + .equalityDeleteFiles(getCounterValue(metrics.equalityDeleteFiles())) + .positionalDeleteFiles(getCounterValue(metrics.positionalDeleteFiles())) + .indexedDeleteFiles(getCounterValue(metrics.indexedDeleteFiles())) + .totalDeleteFileSizeBytes(getCounterValue(metrics.totalDeleteFileSizeInBytes())) + .metadata(Collections.emptyMap()) + .build(); + } + + /** + * Converts an Iceberg CommitReport to a CommitMetricsRecord. + * + * @param commitReport the Iceberg commit report + * @param tableName the table name + * @param context the metrics context containing realm, catalog, and request information + * @return the commit metrics record ready for persistence + */ + public static CommitMetricsRecord fromCommitReport( + CommitReport commitReport, String tableName, MetricsContext context) { + CommitMetricsResult metrics = commitReport.commitMetrics(); + + return CommitMetricsRecord.builder() + .reportId(UUID.randomUUID().toString()) + .realmId(context.realmId()) + .catalogId(context.catalogId()) + .catalogName(context.catalogName()) + .namespace(context.namespace()) + .tableName(tableName) + .timestamp(Instant.now()) + .principalName(context.principalName()) + .requestId(context.requestId()) + .otelTraceId(context.otelTraceId()) + .otelSpanId(context.otelSpanId()) + .reportTraceId(getMetadataValue(commitReport.metadata(), "trace-id")) + .snapshotId(commitReport.snapshotId()) + .sequenceNumber(Optional.of(commitReport.sequenceNumber())) + .operation(commitReport.operation()) + .addedDataFiles(getCounterValue(metrics.addedDataFiles())) + .removedDataFiles(getCounterValue(metrics.removedDataFiles())) + .totalDataFiles(getCounterValue(metrics.totalDataFiles())) + .addedDeleteFiles(getCounterValue(metrics.addedDeleteFiles())) + .removedDeleteFiles(getCounterValue(metrics.removedDeleteFiles())) + .totalDeleteFiles(getCounterValue(metrics.totalDeleteFiles())) + .addedEqualityDeleteFiles(getCounterValue(metrics.addedEqualityDeleteFiles())) + .removedEqualityDeleteFiles(getCounterValue(metrics.removedEqualityDeleteFiles())) + .addedPositionalDeleteFiles(getCounterValue(metrics.addedPositionalDeleteFiles())) + .removedPositionalDeleteFiles(getCounterValue(metrics.removedPositionalDeleteFiles())) + .addedRecords(getCounterValue(metrics.addedRecords())) + .removedRecords(getCounterValue(metrics.removedRecords())) + .totalRecords(getCounterValue(metrics.totalRecords())) + .addedFileSizeBytes(getCounterValue(metrics.addedFilesSizeInBytes())) + .removedFileSizeBytes(getCounterValue(metrics.removedFilesSizeInBytes())) + .totalFileSizeBytes(getCounterValue(metrics.totalFilesSizeInBytes())) + .totalDurationMs(getTimerValueMsOpt(metrics.totalDuration())) + .attempts(getCounterValueInt(metrics.attempts())) + .metadata(Collections.emptyMap()) + .build(); + } + + private static long getCounterValue(CounterResult counter) { + if (counter == null) { + return 0L; + } + return counter.value(); + } + + private static int getCounterValueInt(CounterResult counter) { + if (counter == null) { + return 0; + } + return (int) counter.value(); + } + + private static long getTimerValueMs(TimerResult timer) { + if (timer == null || timer.totalDuration() == null) { + return 0L; + } + return timer.totalDuration().toMillis(); + } + + private static Optional getTimerValueMsOpt(TimerResult timer) { + if (timer == null || timer.totalDuration() == null) { + return Optional.empty(); + } + return Optional.of(timer.totalDuration().toMillis()); + } + + private static Optional getMetadataValue( + java.util.Map metadata, String key) { + if (metadata == null) { + return Optional.empty(); + } + return Optional.ofNullable(metadata.get(key)); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java new file mode 100644 index 0000000000..cd41e82573 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import jakarta.annotation.Nonnull; +import java.util.Collections; +import java.util.List; + +/** + * A no-op implementation of {@link MetricsPersistence} for backends that don't support metrics + * persistence. + * + *

This implementation is used as the default when a persistence backend does not support metrics + * storage. All write operations are silently ignored, and all query operations return empty + * results. + */ +final class NoOpMetricsPersistence implements MetricsPersistence { + + NoOpMetricsPersistence() {} + + @Override + public boolean isSupported() { + return false; + } + + @Override + public void writeScanReport(@Nonnull ScanMetricsRecord record) { + // No-op + } + + @Override + public void writeCommitReport(@Nonnull CommitMetricsRecord record) { + // No-op + } + + @Nonnull + @Override + public List queryScanReports(@Nonnull MetricsQueryCriteria criteria) { + return Collections.emptyList(); + } + + @Nonnull + @Override + public List queryCommitReports(@Nonnull MetricsQueryCriteria criteria) { + return Collections.emptyList(); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java new file mode 100644 index 0000000000..f268e3bc16 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Backend-agnostic representation of an Iceberg scan metrics report. + * + *

This record captures all relevant metrics from an Iceberg {@code ScanReport} along with + * contextual information such as realm, catalog, and request correlation data. + */ +@PolarisImmutable +public interface ScanMetricsRecord { + + // === Identification === + + /** Unique identifier for this report (UUID). */ + String reportId(); + + /** Multi-tenancy realm identifier. */ + String realmId(); + + /** Internal catalog ID. */ + String catalogId(); + + /** Human-readable catalog name. */ + String catalogName(); + + /** Dot-separated namespace path (e.g., "db.schema"). */ + String namespace(); + + /** Table name. */ + String tableName(); + + // === Timing === + + /** Timestamp when the report was received. */ + Instant timestamp(); + + // === Request Context === + + /** Name of the principal who initiated the operation. */ + Optional principalName(); + + /** Request ID for correlation. */ + Optional requestId(); + + /** OpenTelemetry trace ID for distributed tracing. */ + Optional otelTraceId(); + + /** OpenTelemetry span ID for distributed tracing. */ + Optional otelSpanId(); + + /** Trace ID from the report itself (may differ from OTel trace). */ + Optional reportTraceId(); + + // === Scan Context === + + /** Snapshot ID that was scanned. */ + Optional snapshotId(); + + /** Schema ID used for the scan. */ + Optional schemaId(); + + /** Filter expression applied to the scan (as string). */ + Optional filterExpression(); + + /** List of projected field IDs. */ + List projectedFieldIds(); + + /** List of projected field names. */ + List projectedFieldNames(); + + // === Scan Metrics - File Counts === + + /** Number of data files in the result. */ + long resultDataFiles(); + + /** Number of delete files in the result. */ + long resultDeleteFiles(); + + /** Total size of files in bytes. */ + long totalFileSizeBytes(); + + // === Scan Metrics - Manifest Counts === + + /** Total number of data manifests. */ + long totalDataManifests(); + + /** Total number of delete manifests. */ + long totalDeleteManifests(); + + /** Number of data manifests that were scanned. */ + long scannedDataManifests(); + + /** Number of delete manifests that were scanned. */ + long scannedDeleteManifests(); + + /** Number of data manifests that were skipped. */ + long skippedDataManifests(); + + /** Number of delete manifests that were skipped. */ + long skippedDeleteManifests(); + + /** Number of data files that were skipped. */ + long skippedDataFiles(); + + /** Number of delete files that were skipped. */ + long skippedDeleteFiles(); + + // === Scan Metrics - Timing === + + /** Total planning duration in milliseconds. */ + long totalPlanningDurationMs(); + + // === Scan Metrics - Delete Files === + + /** Number of equality delete files. */ + long equalityDeleteFiles(); + + /** Number of positional delete files. */ + long positionalDeleteFiles(); + + /** Number of indexed delete files. */ + long indexedDeleteFiles(); + + /** Total size of delete files in bytes. */ + long totalDeleteFileSizeBytes(); + + // === Extensibility === + + /** Additional metadata as key-value pairs. */ + Map metadata(); + + /** + * Creates a new builder for ScanMetricsRecord. + * + * @return a new builder instance + */ + static ImmutableScanMetricsRecord.Builder builder() { + return ImmutableScanMetricsRecord.builder(); + } +} From bceb1360d1b052715523198e038a373f83cc0f6e Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 29 Jan 2026 13:05:30 -0800 Subject: [PATCH 02/12] refactor(metrics): Remove ambient context fields from SPI records Remove fields that can be obtained from ambient request context at write time: - principalName: Available from SecurityContext/PolarisPrincipal - requestId: Not well-defined in Polaris; unclear what request it refers to - otelTraceId/otelSpanId: Available from OTel context via Span.current() Keep reportTraceId as it's a client-provided value from the report metadata that cannot be obtained from the ambient context. Rename otelTraceId filter in MetricsQueryCriteria to reportTraceId to match the field that is actually stored in the records. This keeps the SPI focused on business data (the metrics themselves) rather than infrastructure concerns (tracing, authentication) which the persistence implementation can obtain from the ambient context at write time if needed. --- .../metrics/CommitMetricsRecord.java | 26 +++++------ .../persistence/metrics/MetricsContext.java | 19 ++------ .../metrics/MetricsQueryCriteria.java | 46 +++++++++++++------ .../metrics/MetricsRecordConverter.java | 8 ---- .../metrics/ScanMetricsRecord.java | 26 +++++------ 5 files changed, 61 insertions(+), 64 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java index 25a1044a82..9d41db8173 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java @@ -57,21 +57,19 @@ public interface CommitMetricsRecord { /** Timestamp when the report was received. */ Instant timestamp(); - // === Request Context === + // === Client Correlation === - /** Name of the principal who initiated the operation. */ - Optional principalName(); - - /** Request ID for correlation. */ - Optional requestId(); - - /** OpenTelemetry trace ID for distributed tracing. */ - Optional otelTraceId(); - - /** OpenTelemetry span ID for distributed tracing. */ - Optional otelSpanId(); - - /** Trace ID from the report itself (may differ from OTel trace). */ + /** + * Client-provided trace ID from the metrics report metadata. + * + *

This is an optional identifier that the Iceberg client may include in the report's metadata + * map (typically under the key "trace-id"). It allows clients to correlate this metrics report + * with their own distributed tracing system or query execution context. + * + *

Note: Server-side tracing information (e.g., OpenTelemetry trace/span IDs) and principal + * information are not included in this record. The persistence implementation can obtain these + * from the ambient request context (OTel context, security context) at write time if needed. + */ Optional reportTraceId(); // === Commit Context === diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java index d63800f82f..38432c12c2 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java @@ -18,14 +18,17 @@ */ package org.apache.polaris.core.persistence.metrics; -import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; /** * Context information needed when converting Iceberg metrics reports to persistence records. * *

This context captures information from the request environment that is not available in the - * Iceberg report itself, such as realm, catalog, principal, and tracing information. + * Iceberg report itself, such as realm and catalog identification. + * + *

Note: Principal and tracing information (e.g., OpenTelemetry trace/span IDs) are not included + * in this context. The persistence implementation can obtain these from the ambient request context + * (OTel context, security context) at write time if needed. */ @PolarisImmutable public interface MetricsContext { @@ -42,18 +45,6 @@ public interface MetricsContext { /** Dot-separated namespace path (e.g., "db.schema"). */ String namespace(); - /** Name of the principal who initiated the operation. */ - Optional principalName(); - - /** Request ID for correlation. */ - Optional requestId(); - - /** OpenTelemetry trace ID for distributed tracing. */ - Optional otelTraceId(); - - /** OpenTelemetry span ID for distributed tracing. */ - Optional otelSpanId(); - /** * Creates a new builder for MetricsContext. * diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index 0336351079..aa844b1424 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -35,8 +35,7 @@ * * * - * - * + * * *
PatternFields UsedIndex Required
By Table + TimecatalogName, namespace, tableName, startTime, endTimeYes (OSS)
By Trace IDotelTraceIdYes (OSS)
By PrincipalprincipalNameNo (custom deployment)
By Client Trace IDreportTraceIdNo (custom deployment)
By Time OnlystartTime, endTimePartial (timestamp index)
*/ @@ -64,26 +63,45 @@ public interface MetricsQueryCriteria { // === Correlation === - /** OpenTelemetry trace ID to filter by. */ - Optional otelTraceId(); - /** - * Principal name to filter by. + * Client-provided trace ID to filter by (from report metadata). + * + *

This matches the {@code reportTraceId} field in the metrics records, which originates from + * the client's metadata map. Useful for correlating metrics with client-side query execution. * *

Note: This query pattern may require a custom index in deployment environments. The OSS - * codebase does not include an index for principal-based queries. + * codebase does not include an index for trace-based queries. */ - Optional principalName(); + Optional reportTraceId(); // === Pagination === - /** Maximum number of results to return. Defaults to 100. */ + /** + * Maximum number of results to return. + * + *

Defaults to 100. Used together with {@link #offset()} for offset-based pagination. + */ @Value.Default default int limit() { return 100; } - /** Number of results to skip. Defaults to 0. */ + /** + * Number of results to skip before returning results. + * + *

Defaults to 0. Used for offset-based pagination where: + * + *

    + *
  • Page 1: offset=0, limit=100 → returns results 1-100 + *
  • Page 2: offset=100, limit=100 → returns results 101-200 + *
  • Page N: offset=(N-1)*limit, limit=100 → returns results for page N + *
+ * + *

Note: Offset-based pagination can be inefficient for large offsets in some databases. For + * very large result sets (>10K records), consider using time-based filtering with {@link + * #startTime()} and {@link #endTime()} to narrow the result set instead of relying on large + * offsets. + */ @Value.Default default int offset() { return 0; @@ -127,13 +145,13 @@ static MetricsQueryCriteria forTable( } /** - * Creates criteria for querying by OpenTelemetry trace ID. + * Creates criteria for querying by client-provided trace ID. * - * @param traceId the trace ID to search for + * @param reportTraceId the client trace ID to search for * @param limit maximum number of results * @return the query criteria */ - static MetricsQueryCriteria forTraceId(String traceId, int limit) { - return builder().otelTraceId(traceId).limit(limit).build(); + static MetricsQueryCriteria forReportTraceId(String reportTraceId, int limit) { + return builder().reportTraceId(reportTraceId).limit(limit).build(); } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java index f790e09658..0cb740b8cf 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java @@ -61,10 +61,6 @@ public static ScanMetricsRecord fromScanReport( .namespace(context.namespace()) .tableName(tableName) .timestamp(Instant.now()) - .principalName(context.principalName()) - .requestId(context.requestId()) - .otelTraceId(context.otelTraceId()) - .otelSpanId(context.otelSpanId()) .reportTraceId(getMetadataValue(scanReport.metadata(), "trace-id")) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) @@ -120,10 +116,6 @@ public static CommitMetricsRecord fromCommitReport( .namespace(context.namespace()) .tableName(tableName) .timestamp(Instant.now()) - .principalName(context.principalName()) - .requestId(context.requestId()) - .otelTraceId(context.otelTraceId()) - .otelSpanId(context.otelSpanId()) .reportTraceId(getMetadataValue(commitReport.metadata(), "trace-id")) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java index f268e3bc16..5e85566c14 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java @@ -58,21 +58,19 @@ public interface ScanMetricsRecord { /** Timestamp when the report was received. */ Instant timestamp(); - // === Request Context === + // === Client Correlation === - /** Name of the principal who initiated the operation. */ - Optional principalName(); - - /** Request ID for correlation. */ - Optional requestId(); - - /** OpenTelemetry trace ID for distributed tracing. */ - Optional otelTraceId(); - - /** OpenTelemetry span ID for distributed tracing. */ - Optional otelSpanId(); - - /** Trace ID from the report itself (may differ from OTel trace). */ + /** + * Client-provided trace ID from the metrics report metadata. + * + *

This is an optional identifier that the Iceberg client may include in the report's metadata + * map (typically under the key "trace-id"). It allows clients to correlate this metrics report + * with their own distributed tracing system or query execution context. + * + *

Note: Server-side tracing information (e.g., OpenTelemetry trace/span IDs) and principal + * information are not included in this record. The persistence implementation can obtain these + * from the ambient request context (OTel context, security context) at write time if needed. + */ Optional reportTraceId(); // === Scan Context === From 01d8e9383736d65055aa0d2174c3ce62a7e10aa1 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Sat, 31 Jan 2026 05:50:35 -0800 Subject: [PATCH 03/12] refactor(metrics): Replace offset-based pagination with PageToken pattern - Create ReportIdToken for cursor-based pagination using report ID (UUID) - Remove limit() and offset() from MetricsQueryCriteria - Update MetricsPersistence to use PageToken parameter and return Page - Update NoOpMetricsPersistence to return empty Page objects - Register ReportIdToken via service loader This change makes the SPI truly backend-agnostic by using the existing Polaris PageToken pattern instead of RDBMS-specific offset pagination. Each backend can implement cursors in their optimal way (keyset for RDBMS, continuation tokens for NoSQL). Addresses reviewer feedback on MetricsQueryCriteria.offset() field. --- .../metrics/MetricsPersistence.java | 57 ++++++-- .../metrics/MetricsQueryCriteria.java | 90 ++++++------- .../metrics/NoOpMetricsPersistence.java | 16 ++- .../persistence/metrics/ReportIdToken.java | 122 ++++++++++++++++++ ...ore.persistence.pagination.Token$TokenType | 1 + 5 files changed, 223 insertions(+), 63 deletions(-) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java index f13058933a..346cc77d01 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java @@ -19,7 +19,8 @@ package org.apache.polaris.core.persistence.metrics; import jakarta.annotation.Nonnull; -import java.util.List; +import org.apache.polaris.core.persistence.pagination.Page; +import org.apache.polaris.core.persistence.pagination.PageToken; /** * Service Provider Interface (SPI) for persisting Iceberg metrics reports. @@ -30,6 +31,24 @@ * *

Implementations should be idempotent - writing the same reportId twice should have no effect. * Implementations that don't support metrics persistence should return {@link #NOOP}. + * + *

Pagination

+ * + *

Query methods use the standard Polaris pagination pattern with {@link PageToken} for requests + * and {@link Page} for responses. This enables: + * + *

    + *
  • Backend-specific cursor implementations (RDBMS offset, NoSQL continuation tokens, etc.) + *
  • Consistent pagination interface across all Polaris persistence APIs + *
  • Efficient cursor-based pagination that works with large result sets + *
+ * + *

The {@link ReportIdToken} provides a cursor based on the report ID (UUID), but backends may + * use other cursor strategies internally. + * + * @see PageToken + * @see Page + * @see ReportIdToken */ public interface MetricsPersistence { @@ -82,22 +101,42 @@ public interface MetricsPersistence { /** * Queries scan metrics reports based on the specified criteria. * - *

Returns an empty list if {@link #isSupported()} returns false. + *

Returns an empty page if {@link #isSupported()} returns false. + * + *

Example usage: + * + *

{@code
+   * // First page
+   * PageToken pageToken = PageToken.fromLimit(100);
+   * Page page = persistence.queryScanReports(criteria, pageToken);
+   *
+   * // Next page (if available)
+   * String nextPageToken = page.encodedResponseToken();
+   * if (nextPageToken != null) {
+   *   pageToken = PageToken.build(nextPageToken, null, () -> true);
+   *   Page nextPage = persistence.queryScanReports(criteria, pageToken);
+   * }
+   * }
* - * @param criteria the query criteria - * @return list of matching scan metrics records, or empty list if not supported + * @param criteria the query criteria (filters) + * @param pageToken pagination parameters (page size and optional cursor) + * @return page of matching scan metrics records with continuation token if more results exist */ @Nonnull - List queryScanReports(@Nonnull MetricsQueryCriteria criteria); + Page queryScanReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken); /** * Queries commit metrics reports based on the specified criteria. * - *

Returns an empty list if {@link #isSupported()} returns false. + *

Returns an empty page if {@link #isSupported()} returns false. * - * @param criteria the query criteria - * @return list of matching commit metrics records, or empty list if not supported + * @param criteria the query criteria (filters) + * @param pageToken pagination parameters (page size and optional cursor) + * @return page of matching commit metrics records with continuation token if more results exist + * @see #queryScanReports(MetricsQueryCriteria, PageToken) for pagination example */ @Nonnull - List queryCommitReports(@Nonnull MetricsQueryCriteria criteria); + Page queryCommitReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken); } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index aa844b1424..d1989c68d2 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -21,14 +21,19 @@ import java.time.Instant; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; -import org.immutables.value.Value; /** * Query criteria for retrieving metrics reports. * - *

This class defines the parameters that can be used to filter and paginate metrics query - * results. Not all backends may support all query patterns - check the implementation documentation - * for supported query patterns and required indexes. + *

This class defines the filter parameters for metrics queries. Pagination is handled separately + * via {@link org.apache.polaris.core.persistence.pagination.PageToken}, which is passed as a + * separate parameter to query methods. This separation of concerns allows: + * + *

    + *
  • Different backends to implement pagination in their optimal way + *
  • Cursor-based pagination that works with both RDBMS and NoSQL backends + *
  • Reuse of the existing Polaris pagination infrastructure + *
* *

Supported Query Patterns

* @@ -38,6 +43,23 @@ * By Client Trace IDreportTraceIdNo (custom deployment) * By Time OnlystartTime, endTimePartial (timestamp index) * + * + *

Pagination

+ * + *

Pagination is handled via the {@link org.apache.polaris.core.persistence.pagination.PageToken} + * passed to query methods. The token contains: + * + *

    + *
  • {@code pageSize()} - Maximum number of results to return + *
  • {@code value()} - Optional cursor token (e.g., {@link ReportIdToken}) for continuation + *
+ * + *

Query results are returned as {@link org.apache.polaris.core.persistence.pagination.Page} + * which includes an encoded token for fetching the next page. + * + * @see org.apache.polaris.core.persistence.pagination.PageToken + * @see org.apache.polaris.core.persistence.pagination.Page + * @see ReportIdToken */ @PolarisImmutable public interface MetricsQueryCriteria { @@ -74,38 +96,7 @@ public interface MetricsQueryCriteria { */ Optional reportTraceId(); - // === Pagination === - - /** - * Maximum number of results to return. - * - *

Defaults to 100. Used together with {@link #offset()} for offset-based pagination. - */ - @Value.Default - default int limit() { - return 100; - } - - /** - * Number of results to skip before returning results. - * - *

Defaults to 0. Used for offset-based pagination where: - * - *

    - *
  • Page 1: offset=0, limit=100 → returns results 1-100 - *
  • Page 2: offset=100, limit=100 → returns results 101-200 - *
  • Page N: offset=(N-1)*limit, limit=100 → returns results for page N - *
- * - *

Note: Offset-based pagination can be inefficient for large offsets in some databases. For - * very large result sets (>10K records), consider using time-based filtering with {@link - * #startTime()} and {@link #endTime()} to narrow the result set instead of relying on large - * offsets. - */ - @Value.Default - default int offset() { - return 0; - } + // === Factory Methods === /** * Creates a new builder for MetricsQueryCriteria. @@ -119,39 +110,44 @@ static ImmutableMetricsQueryCriteria.Builder builder() { /** * Creates criteria for querying by table and time range. * + *

Pagination is handled separately via the {@code PageToken} parameter to query methods. + * * @param catalogName the catalog name * @param namespace the namespace (dot-separated) * @param tableName the table name * @param startTime the start time (inclusive) * @param endTime the end time (exclusive) - * @param limit maximum number of results * @return the query criteria */ static MetricsQueryCriteria forTable( - String catalogName, - String namespace, - String tableName, - Instant startTime, - Instant endTime, - int limit) { + String catalogName, String namespace, String tableName, Instant startTime, Instant endTime) { return builder() .catalogName(catalogName) .namespace(namespace) .tableName(tableName) .startTime(startTime) .endTime(endTime) - .limit(limit) .build(); } /** * Creates criteria for querying by client-provided trace ID. * + *

Pagination is handled separately via the {@code PageToken} parameter to query methods. + * * @param reportTraceId the client trace ID to search for - * @param limit maximum number of results * @return the query criteria */ - static MetricsQueryCriteria forReportTraceId(String reportTraceId, int limit) { - return builder().reportTraceId(reportTraceId).limit(limit).build(); + static MetricsQueryCriteria forReportTraceId(String reportTraceId) { + return builder().reportTraceId(reportTraceId).build(); + } + + /** + * Creates empty criteria (no filters). Useful for pagination-only queries. + * + * @return empty query criteria + */ + static MetricsQueryCriteria empty() { + return builder().build(); } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java index cd41e82573..56bd435ccf 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java @@ -20,15 +20,15 @@ import jakarta.annotation.Nonnull; import java.util.Collections; -import java.util.List; +import org.apache.polaris.core.persistence.pagination.Page; +import org.apache.polaris.core.persistence.pagination.PageToken; /** * A no-op implementation of {@link MetricsPersistence} for backends that don't support metrics * persistence. * *

This implementation is used as the default when a persistence backend does not support metrics - * storage. All write operations are silently ignored, and all query operations return empty - * results. + * storage. All write operations are silently ignored, and all query operations return empty pages. */ final class NoOpMetricsPersistence implements MetricsPersistence { @@ -51,13 +51,15 @@ public void writeCommitReport(@Nonnull CommitMetricsRecord record) { @Nonnull @Override - public List queryScanReports(@Nonnull MetricsQueryCriteria criteria) { - return Collections.emptyList(); + public Page queryScanReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { + return Page.fromItems(Collections.emptyList()); } @Nonnull @Override - public List queryCommitReports(@Nonnull MetricsQueryCriteria criteria) { - return Collections.emptyList(); + public Page queryCommitReports( + @Nonnull MetricsQueryCriteria criteria, @Nonnull PageToken pageToken) { + return Page.fromItems(Collections.emptyList()); } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java new file mode 100644 index 0000000000..fb15480a0c --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import jakarta.annotation.Nullable; +import org.apache.polaris.core.persistence.pagination.Token; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Pagination {@linkplain Token token} for metrics queries, backed by the report ID (UUID). + * + *

This token enables cursor-based pagination for metrics queries across different storage + * backends. The report ID is used as the cursor because it is: + * + *

    + *
  • Guaranteed unique across all reports + *
  • Present in both scan and commit metrics records + *
  • Stable (doesn't change over time) + *
+ * + *

Each backend implementation can use this cursor value to implement efficient pagination in + * whatever way is optimal for that storage system: + * + *

    + *
  • RDBMS: {@code WHERE report_id > :lastReportId ORDER BY report_id} + *
  • NoSQL: Use report ID as partition/sort key cursor + *
  • Time-series: Combine with timestamp for efficient range scans + *
+ */ +@PolarisImmutable +@JsonSerialize(as = ImmutableReportIdToken.class) +@JsonDeserialize(as = ImmutableReportIdToken.class) +public interface ReportIdToken extends Token { + + /** Token type identifier. Short to minimize serialized token size. */ + String ID = "r"; + + /** + * The report ID to use as the cursor. + * + *

Results should start after this report ID. This is typically the {@code reportId} of the + * last item from the previous page. + */ + @JsonProperty("r") + String reportId(); + + @Override + default String getT() { + return ID; + } + + /** + * Creates a token from a report ID. + * + * @param reportId the report ID to use as cursor + * @return the token, or null if reportId is null + */ + static @Nullable ReportIdToken fromReportId(@Nullable String reportId) { + if (reportId == null) { + return null; + } + return ImmutableReportIdToken.builder().reportId(reportId).build(); + } + + /** + * Creates a token from a metrics record. + * + * @param record the record whose report ID should be used as cursor + * @return the token, or null if record is null + */ + static @Nullable ReportIdToken fromRecord(@Nullable ScanMetricsRecord record) { + if (record == null) { + return null; + } + return fromReportId(record.reportId()); + } + + /** + * Creates a token from a commit metrics record. + * + * @param record the record whose report ID should be used as cursor + * @return the token, or null if record is null + */ + static @Nullable ReportIdToken fromRecord(@Nullable CommitMetricsRecord record) { + if (record == null) { + return null; + } + return fromReportId(record.reportId()); + } + + /** Token type registration for service loader. */ + final class ReportIdTokenType implements TokenType { + @Override + public String id() { + return ID; + } + + @Override + public Class javaType() { + return ReportIdToken.class; + } + } +} diff --git a/polaris-core/src/main/resources/META-INF/services/org.apache.polaris.core.persistence.pagination.Token$TokenType b/polaris-core/src/main/resources/META-INF/services/org.apache.polaris.core.persistence.pagination.Token$TokenType index 3579dd29b3..d496ebeddf 100644 --- a/polaris-core/src/main/resources/META-INF/services/org.apache.polaris.core.persistence.pagination.Token$TokenType +++ b/polaris-core/src/main/resources/META-INF/services/org.apache.polaris.core.persistence.pagination.Token$TokenType @@ -18,3 +18,4 @@ # org.apache.polaris.core.persistence.pagination.EntityIdToken$EntityIdTokenType +org.apache.polaris.core.persistence.metrics.ReportIdToken$ReportIdTokenType From 59c907f270369fde124ac8b84899e35065066bf9 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Tue, 3 Feb 2026 18:28:22 -0800 Subject: [PATCH 04/12] Review comments --- .../iceberg/MetricsRecordConverter.java | 256 ++++++++++++++++++ .../metrics/CommitMetricsRecord.java | 56 +--- .../persistence/metrics/MetricsContext.java | 56 ---- .../metrics/MetricsPersistence.java | 57 ++-- .../metrics/MetricsPersistenceFactory.java | 41 --- .../metrics/MetricsQueryCriteria.java | 33 +-- .../metrics/MetricsRecordConverter.java | 180 ------------ .../metrics/MetricsRecordIdentity.java | 93 +++++++ .../metrics/NoOpMetricsPersistence.java | 5 - .../persistence/metrics/ReportIdToken.java | 9 + .../metrics/ScanMetricsRecord.java | 56 +--- 11 files changed, 416 insertions(+), 426 deletions(-) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java new file mode 100644 index 0000000000..e9d13f36d0 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.metrics.iceberg; + +import java.time.Instant; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.apache.iceberg.metrics.CommitMetricsResult; +import org.apache.iceberg.metrics.CommitReport; +import org.apache.iceberg.metrics.CounterResult; +import org.apache.iceberg.metrics.ScanMetricsResult; +import org.apache.iceberg.metrics.ScanReport; +import org.apache.iceberg.metrics.TimerResult; +import org.apache.polaris.core.persistence.metrics.CommitMetricsRecord; +import org.apache.polaris.core.persistence.metrics.ScanMetricsRecord; + +/** + * Converts Iceberg metrics reports to SPI record types using a fluent builder API. + * + *

This converter extracts all relevant metrics from Iceberg's {@link ScanReport} and {@link + * CommitReport} and combines them with context information to create persistence-ready records. + * + *

Example usage: + * + *

{@code
+ * ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport)
+ *     .catalogId(catalog.getId())
+ *     .catalogName(catalog.getName())
+ *     .namespace(namespace.toString())
+ *     .tableName(tableName)
+ *     .build();
+ * }
+ */ +public final class MetricsRecordConverter { + + private MetricsRecordConverter() { + // Utility class + } + + /** + * Creates a builder for converting a ScanReport to a ScanMetricsRecord. + * + * @param scanReport the Iceberg scan report + * @return builder for configuring the conversion + */ + public static ScanReportBuilder forScanReport(ScanReport scanReport) { + return new ScanReportBuilder(scanReport); + } + + /** + * Creates a builder for converting a CommitReport to a CommitMetricsRecord. + * + * @param commitReport the Iceberg commit report + * @return builder for configuring the conversion + */ + public static CommitReportBuilder forCommitReport(CommitReport commitReport) { + return new CommitReportBuilder(commitReport); + } + + /** Builder for converting ScanReport to ScanMetricsRecord. */ + public static final class ScanReportBuilder { + private final ScanReport scanReport; + private long catalogId; + private String catalogName; + private String namespace; + private String tableName; + + private ScanReportBuilder(ScanReport scanReport) { + this.scanReport = scanReport; + } + + public ScanReportBuilder catalogId(long catalogId) { + this.catalogId = catalogId; + return this; + } + + public ScanReportBuilder catalogName(String catalogName) { + this.catalogName = catalogName; + return this; + } + + public ScanReportBuilder namespace(String namespace) { + this.namespace = namespace; + return this; + } + + public ScanReportBuilder tableName(String tableName) { + this.tableName = tableName; + return this; + } + + public ScanMetricsRecord build() { + ScanMetricsResult metrics = scanReport.scanMetrics(); + Map reportMetadata = + scanReport.metadata() != null ? scanReport.metadata() : Collections.emptyMap(); + + return ScanMetricsRecord.builder() + .reportId(UUID.randomUUID().toString()) + .catalogId(catalogId) + .catalogName(catalogName) + .namespace(namespace) + .tableName(tableName) + .timestamp(Instant.now()) + .snapshotId(Optional.of(scanReport.snapshotId())) + .schemaId(Optional.of(scanReport.schemaId())) + .filterExpression( + scanReport.filter() != null + ? Optional.of(scanReport.filter().toString()) + : Optional.empty()) + .projectedFieldIds( + scanReport.projectedFieldIds() != null + ? scanReport.projectedFieldIds() + : Collections.emptyList()) + .projectedFieldNames( + scanReport.projectedFieldNames() != null + ? scanReport.projectedFieldNames() + : Collections.emptyList()) + .resultDataFiles(getCounterValue(metrics.resultDataFiles())) + .resultDeleteFiles(getCounterValue(metrics.resultDeleteFiles())) + .totalFileSizeBytes(getCounterValue(metrics.totalFileSizeInBytes())) + .totalDataManifests(getCounterValue(metrics.totalDataManifests())) + .totalDeleteManifests(getCounterValue(metrics.totalDeleteManifests())) + .scannedDataManifests(getCounterValue(metrics.scannedDataManifests())) + .scannedDeleteManifests(getCounterValue(metrics.scannedDeleteManifests())) + .skippedDataManifests(getCounterValue(metrics.skippedDataManifests())) + .skippedDeleteManifests(getCounterValue(metrics.skippedDeleteManifests())) + .skippedDataFiles(getCounterValue(metrics.skippedDataFiles())) + .skippedDeleteFiles(getCounterValue(metrics.skippedDeleteFiles())) + .totalPlanningDurationMs(getTimerValueMs(metrics.totalPlanningDuration())) + .equalityDeleteFiles(getCounterValue(metrics.equalityDeleteFiles())) + .positionalDeleteFiles(getCounterValue(metrics.positionalDeleteFiles())) + .indexedDeleteFiles(getCounterValue(metrics.indexedDeleteFiles())) + .totalDeleteFileSizeBytes(getCounterValue(metrics.totalDeleteFileSizeInBytes())) + .metadata(reportMetadata) + .build(); + } + } + + /** Builder for converting CommitReport to CommitMetricsRecord. */ + public static final class CommitReportBuilder { + private final CommitReport commitReport; + private long catalogId; + private String catalogName; + private String namespace; + private String tableName; + + private CommitReportBuilder(CommitReport commitReport) { + this.commitReport = commitReport; + } + + public CommitReportBuilder catalogId(long catalogId) { + this.catalogId = catalogId; + return this; + } + + public CommitReportBuilder catalogName(String catalogName) { + this.catalogName = catalogName; + return this; + } + + public CommitReportBuilder namespace(String namespace) { + this.namespace = namespace; + return this; + } + + public CommitReportBuilder tableName(String tableName) { + this.tableName = tableName; + return this; + } + + public CommitMetricsRecord build() { + CommitMetricsResult metrics = commitReport.commitMetrics(); + Map reportMetadata = + commitReport.metadata() != null ? commitReport.metadata() : Collections.emptyMap(); + + return CommitMetricsRecord.builder() + .reportId(UUID.randomUUID().toString()) + .catalogId(catalogId) + .catalogName(catalogName) + .namespace(namespace) + .tableName(tableName) + .timestamp(Instant.now()) + .snapshotId(commitReport.snapshotId()) + .sequenceNumber(Optional.of(commitReport.sequenceNumber())) + .operation(commitReport.operation()) + .addedDataFiles(getCounterValue(metrics.addedDataFiles())) + .removedDataFiles(getCounterValue(metrics.removedDataFiles())) + .totalDataFiles(getCounterValue(metrics.totalDataFiles())) + .addedDeleteFiles(getCounterValue(metrics.addedDeleteFiles())) + .removedDeleteFiles(getCounterValue(metrics.removedDeleteFiles())) + .totalDeleteFiles(getCounterValue(metrics.totalDeleteFiles())) + .addedEqualityDeleteFiles(getCounterValue(metrics.addedEqualityDeleteFiles())) + .removedEqualityDeleteFiles(getCounterValue(metrics.removedEqualityDeleteFiles())) + .addedPositionalDeleteFiles(getCounterValue(metrics.addedPositionalDeleteFiles())) + .removedPositionalDeleteFiles(getCounterValue(metrics.removedPositionalDeleteFiles())) + .addedRecords(getCounterValue(metrics.addedRecords())) + .removedRecords(getCounterValue(metrics.removedRecords())) + .totalRecords(getCounterValue(metrics.totalRecords())) + .addedFileSizeBytes(getCounterValue(metrics.addedFilesSizeInBytes())) + .removedFileSizeBytes(getCounterValue(metrics.removedFilesSizeInBytes())) + .totalFileSizeBytes(getCounterValue(metrics.totalFilesSizeInBytes())) + .totalDurationMs(getTimerValueMsOpt(metrics.totalDuration())) + .attempts(getCounterValueInt(metrics.attempts())) + .metadata(reportMetadata) + .build(); + } + } + + // === Helper Methods === + + private static long getCounterValue(CounterResult counter) { + if (counter == null) { + return 0L; + } + return counter.value(); + } + + private static int getCounterValueInt(CounterResult counter) { + if (counter == null) { + return 0; + } + return (int) counter.value(); + } + + private static long getTimerValueMs(TimerResult timer) { + if (timer == null || timer.totalDuration() == null) { + return 0L; + } + return timer.totalDuration().toMillis(); + } + + private static Optional getTimerValueMsOpt(TimerResult timer) { + if (timer == null || timer.totalDuration() == null) { + return Optional.empty(); + } + return Optional.of(timer.totalDuration().toMillis()); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java index 9d41db8173..2986beb0fb 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java @@ -18,8 +18,6 @@ */ package org.apache.polaris.core.persistence.metrics; -import java.time.Instant; -import java.util.Map; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; @@ -27,50 +25,15 @@ * Backend-agnostic representation of an Iceberg commit metrics report. * *

This record captures all relevant metrics from an Iceberg {@code CommitReport} along with - * contextual information such as realm, catalog, and request correlation data. + * contextual information such as catalog identification and table location. + * + *

Common identification fields are inherited from {@link MetricsRecordIdentity}. + * + *

Note: Realm ID is not included in this record. Multi-tenancy realm context should be obtained + * from the CDI-injected {@code RealmContext} at persistence time. */ @PolarisImmutable -public interface CommitMetricsRecord { - - // === Identification === - - /** Unique identifier for this report (UUID). */ - String reportId(); - - /** Multi-tenancy realm identifier. */ - String realmId(); - - /** Internal catalog ID. */ - String catalogId(); - - /** Human-readable catalog name. */ - String catalogName(); - - /** Dot-separated namespace path (e.g., "db.schema"). */ - String namespace(); - - /** Table name. */ - String tableName(); - - // === Timing === - - /** Timestamp when the report was received. */ - Instant timestamp(); - - // === Client Correlation === - - /** - * Client-provided trace ID from the metrics report metadata. - * - *

This is an optional identifier that the Iceberg client may include in the report's metadata - * map (typically under the key "trace-id"). It allows clients to correlate this metrics report - * with their own distributed tracing system or query execution context. - * - *

Note: Server-side tracing information (e.g., OpenTelemetry trace/span IDs) and principal - * information are not included in this record. The persistence implementation can obtain these - * from the ambient request context (OTel context, security context) at write time if needed. - */ - Optional reportTraceId(); +public interface CommitMetricsRecord extends MetricsRecordIdentity { // === Commit Context === @@ -147,11 +110,6 @@ public interface CommitMetricsRecord { /** Number of commit attempts. */ int attempts(); - // === Extensibility === - - /** Additional metadata as key-value pairs. */ - Map metadata(); - /** * Creates a new builder for CommitMetricsRecord. * diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java deleted file mode 100644 index 38432c12c2..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsContext.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.persistence.metrics; - -import org.apache.polaris.immutables.PolarisImmutable; - -/** - * Context information needed when converting Iceberg metrics reports to persistence records. - * - *

This context captures information from the request environment that is not available in the - * Iceberg report itself, such as realm and catalog identification. - * - *

Note: Principal and tracing information (e.g., OpenTelemetry trace/span IDs) are not included - * in this context. The persistence implementation can obtain these from the ambient request context - * (OTel context, security context) at write time if needed. - */ -@PolarisImmutable -public interface MetricsContext { - - /** Multi-tenancy realm identifier. */ - String realmId(); - - /** Internal catalog ID. */ - String catalogId(); - - /** Human-readable catalog name. */ - String catalogName(); - - /** Dot-separated namespace path (e.g., "db.schema"). */ - String namespace(); - - /** - * Creates a new builder for MetricsContext. - * - * @return a new builder instance - */ - static ImmutableMetricsContext.Builder builder() { - return ImmutableMetricsContext.builder(); - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java index 346cc77d01..ae7e6f7ec0 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java @@ -30,7 +30,33 @@ * remain backend-agnostic. * *

Implementations should be idempotent - writing the same reportId twice should have no effect. - * Implementations that don't support metrics persistence should return {@link #NOOP}. + * Implementations that don't support metrics persistence can use {@link #NOOP} which silently + * ignores write operations and returns empty pages for queries. + * + *

Dependency Injection

+ * + *

This interface is designed to be injected via CDI (Contexts and Dependency Injection). The + * deployment module (e.g., {@code polaris-quarkus-service}) should provide a {@code @Produces} + * method that creates the appropriate implementation based on the configured persistence backend. + * + *

Example producer: + * + *

{@code
+ * @Produces
+ * @RequestScoped
+ * MetricsPersistence metricsPersistence(RealmContext realmContext, PersistenceBackend backend) {
+ *   if (backend.supportsMetrics()) {
+ *     return backend.createMetricsPersistence(realmContext);
+ *   }
+ *   return MetricsPersistence.NOOP;
+ * }
+ * }
+ * + *

Multi-Tenancy

+ * + *

Realm context is not passed in the record objects. Implementations should obtain the realm + * from the CDI-injected {@code RealmContext} at write/query time. This keeps catalog-specific code + * from needing to manage realm concerns directly. * *

Pagination

* @@ -43,8 +69,8 @@ *
  • Efficient cursor-based pagination that works with large result sets * * - *

    The {@link ReportIdToken} provides a cursor based on the report ID (UUID), but backends may - * use other cursor strategies internally. + *

    The {@link ReportIdToken} provides a reference cursor implementation based on report ID + * (UUID), but backends may use other cursor strategies internally. * * @see PageToken * @see Page @@ -55,21 +81,6 @@ public interface MetricsPersistence { /** A no-op implementation for backends that don't support metrics persistence. */ MetricsPersistence NOOP = new NoOpMetricsPersistence(); - // ============================================================================ - // Capability Detection - // ============================================================================ - - /** - * Returns whether this persistence backend supports metrics storage. - * - *

    Backends that do not support metrics should return false. Service code should NOT use this - * to branch with instanceof checks - instead, call the interface methods directly and rely on the - * no-op behavior for unsupported backends. - * - * @return true if metrics persistence is supported, false otherwise - */ - boolean isSupported(); - // ============================================================================ // Write Operations // ============================================================================ @@ -77,8 +88,7 @@ public interface MetricsPersistence { /** * Persists a scan metrics record. * - *

    This operation is idempotent - writing the same reportId twice has no effect. If {@link - * #isSupported()} returns false, this is a no-op. + *

    This operation is idempotent - writing the same reportId twice has no effect. * * @param record the scan metrics record to persist */ @@ -87,8 +97,7 @@ public interface MetricsPersistence { /** * Persists a commit metrics record. * - *

    This operation is idempotent - writing the same reportId twice has no effect. If {@link - * #isSupported()} returns false, this is a no-op. + *

    This operation is idempotent - writing the same reportId twice has no effect. * * @param record the commit metrics record to persist */ @@ -101,8 +110,6 @@ public interface MetricsPersistence { /** * Queries scan metrics reports based on the specified criteria. * - *

    Returns an empty page if {@link #isSupported()} returns false. - * *

    Example usage: * *

    {@code
    @@ -129,8 +136,6 @@ Page queryScanReports(
       /**
        * Queries commit metrics reports based on the specified criteria.
        *
    -   * 

    Returns an empty page if {@link #isSupported()} returns false. - * * @param criteria the query criteria (filters) * @param pageToken pagination parameters (page size and optional cursor) * @return page of matching commit metrics records with continuation token if more results exist diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java deleted file mode 100644 index 90a56e1ec3..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistenceFactory.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.persistence.metrics; - -import org.apache.polaris.core.context.RealmContext; - -/** - * Factory interface for creating {@link MetricsPersistence} instances. - * - *

    Implementations may cache instances per realm for efficiency. For backends that don't support - * metrics persistence, implementations should return {@link MetricsPersistence#NOOP}. - */ -public interface MetricsPersistenceFactory { - - /** - * Gets or creates a {@link MetricsPersistence} instance for the given realm. - * - *

    Implementations may cache instances per realm. If the persistence backend does not support - * metrics persistence, this method should return {@link MetricsPersistence#NOOP}. - * - * @param realmContext the realm context - * @return a MetricsPersistence instance for the realm, or NOOP if not supported - */ - MetricsPersistence getOrCreateMetricsPersistence(RealmContext realmContext); -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index d1989c68d2..98d00d742c 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -40,10 +40,13 @@ * * * - * * *
    PatternFields UsedIndex Required
    By Table + TimecatalogName, namespace, tableName, startTime, endTimeYes (OSS)
    By Client Trace IDreportTraceIdNo (custom deployment)
    By Time OnlystartTime, endTimePartial (timestamp index)
    * + *

    Additional query patterns (e.g., by trace ID) can be implemented by persistence backends using + * the {@link #metadata()} filter map. Client-provided correlation data should be stored in the + * metrics record's metadata map and can be filtered using the metadata criteria. + * *

    Pagination

    * *

    Pagination is handled via the {@link org.apache.polaris.core.persistence.pagination.PageToken} @@ -83,18 +86,20 @@ public interface MetricsQueryCriteria { /** End time for the query (exclusive). */ Optional endTime(); - // === Correlation === + // === Metadata Filtering === /** - * Client-provided trace ID to filter by (from report metadata). + * Metadata key-value pairs to filter by. * - *

    This matches the {@code reportTraceId} field in the metrics records, which originates from - * the client's metadata map. Useful for correlating metrics with client-side query execution. + *

    This enables filtering metrics by client-provided correlation data stored in the record's + * metadata map. For example, clients may include a trace ID in the metadata that can be queried + * later. * - *

    Note: This query pattern may require a custom index in deployment environments. The OSS - * codebase does not include an index for trace-based queries. + *

    Note: Metadata filtering may require custom indexes depending on the persistence backend. + * The OSS codebase provides basic support, but performance optimizations may be needed for + * high-volume deployments. */ - Optional reportTraceId(); + java.util.Map metadata(); // === Factory Methods === @@ -130,18 +135,6 @@ static MetricsQueryCriteria forTable( .build(); } - /** - * Creates criteria for querying by client-provided trace ID. - * - *

    Pagination is handled separately via the {@code PageToken} parameter to query methods. - * - * @param reportTraceId the client trace ID to search for - * @return the query criteria - */ - static MetricsQueryCriteria forReportTraceId(String reportTraceId) { - return builder().reportTraceId(reportTraceId).build(); - } - /** * Creates empty criteria (no filters). Useful for pagination-only queries. * diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java deleted file mode 100644 index 0cb740b8cf..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordConverter.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.persistence.metrics; - -import java.time.Instant; -import java.util.Collections; -import java.util.Optional; -import java.util.UUID; -import org.apache.iceberg.metrics.CommitMetricsResult; -import org.apache.iceberg.metrics.CommitReport; -import org.apache.iceberg.metrics.CounterResult; -import org.apache.iceberg.metrics.ScanMetricsResult; -import org.apache.iceberg.metrics.ScanReport; -import org.apache.iceberg.metrics.TimerResult; - -/** - * Utility class for converting Iceberg metrics reports to SPI record types. - * - *

    This converter extracts all relevant metrics from Iceberg's {@link ScanReport} and {@link - * CommitReport} and combines them with context information to create persistence-ready records. - */ -public final class MetricsRecordConverter { - - private MetricsRecordConverter() { - // Utility class - } - - /** - * Converts an Iceberg ScanReport to a ScanMetricsRecord. - * - * @param scanReport the Iceberg scan report - * @param tableName the table name - * @param context the metrics context containing realm, catalog, and request information - * @return the scan metrics record ready for persistence - */ - public static ScanMetricsRecord fromScanReport( - ScanReport scanReport, String tableName, MetricsContext context) { - ScanMetricsResult metrics = scanReport.scanMetrics(); - - return ScanMetricsRecord.builder() - .reportId(UUID.randomUUID().toString()) - .realmId(context.realmId()) - .catalogId(context.catalogId()) - .catalogName(context.catalogName()) - .namespace(context.namespace()) - .tableName(tableName) - .timestamp(Instant.now()) - .reportTraceId(getMetadataValue(scanReport.metadata(), "trace-id")) - .snapshotId(Optional.of(scanReport.snapshotId())) - .schemaId(Optional.of(scanReport.schemaId())) - .filterExpression( - scanReport.filter() != null - ? Optional.of(scanReport.filter().toString()) - : Optional.empty()) - .projectedFieldIds( - scanReport.projectedFieldIds() != null - ? scanReport.projectedFieldIds() - : Collections.emptyList()) - .projectedFieldNames( - scanReport.projectedFieldNames() != null - ? scanReport.projectedFieldNames() - : Collections.emptyList()) - .resultDataFiles(getCounterValue(metrics.resultDataFiles())) - .resultDeleteFiles(getCounterValue(metrics.resultDeleteFiles())) - .totalFileSizeBytes(getCounterValue(metrics.totalFileSizeInBytes())) - .totalDataManifests(getCounterValue(metrics.totalDataManifests())) - .totalDeleteManifests(getCounterValue(metrics.totalDeleteManifests())) - .scannedDataManifests(getCounterValue(metrics.scannedDataManifests())) - .scannedDeleteManifests(getCounterValue(metrics.scannedDeleteManifests())) - .skippedDataManifests(getCounterValue(metrics.skippedDataManifests())) - .skippedDeleteManifests(getCounterValue(metrics.skippedDeleteManifests())) - .skippedDataFiles(getCounterValue(metrics.skippedDataFiles())) - .skippedDeleteFiles(getCounterValue(metrics.skippedDeleteFiles())) - .totalPlanningDurationMs(getTimerValueMs(metrics.totalPlanningDuration())) - .equalityDeleteFiles(getCounterValue(metrics.equalityDeleteFiles())) - .positionalDeleteFiles(getCounterValue(metrics.positionalDeleteFiles())) - .indexedDeleteFiles(getCounterValue(metrics.indexedDeleteFiles())) - .totalDeleteFileSizeBytes(getCounterValue(metrics.totalDeleteFileSizeInBytes())) - .metadata(Collections.emptyMap()) - .build(); - } - - /** - * Converts an Iceberg CommitReport to a CommitMetricsRecord. - * - * @param commitReport the Iceberg commit report - * @param tableName the table name - * @param context the metrics context containing realm, catalog, and request information - * @return the commit metrics record ready for persistence - */ - public static CommitMetricsRecord fromCommitReport( - CommitReport commitReport, String tableName, MetricsContext context) { - CommitMetricsResult metrics = commitReport.commitMetrics(); - - return CommitMetricsRecord.builder() - .reportId(UUID.randomUUID().toString()) - .realmId(context.realmId()) - .catalogId(context.catalogId()) - .catalogName(context.catalogName()) - .namespace(context.namespace()) - .tableName(tableName) - .timestamp(Instant.now()) - .reportTraceId(getMetadataValue(commitReport.metadata(), "trace-id")) - .snapshotId(commitReport.snapshotId()) - .sequenceNumber(Optional.of(commitReport.sequenceNumber())) - .operation(commitReport.operation()) - .addedDataFiles(getCounterValue(metrics.addedDataFiles())) - .removedDataFiles(getCounterValue(metrics.removedDataFiles())) - .totalDataFiles(getCounterValue(metrics.totalDataFiles())) - .addedDeleteFiles(getCounterValue(metrics.addedDeleteFiles())) - .removedDeleteFiles(getCounterValue(metrics.removedDeleteFiles())) - .totalDeleteFiles(getCounterValue(metrics.totalDeleteFiles())) - .addedEqualityDeleteFiles(getCounterValue(metrics.addedEqualityDeleteFiles())) - .removedEqualityDeleteFiles(getCounterValue(metrics.removedEqualityDeleteFiles())) - .addedPositionalDeleteFiles(getCounterValue(metrics.addedPositionalDeleteFiles())) - .removedPositionalDeleteFiles(getCounterValue(metrics.removedPositionalDeleteFiles())) - .addedRecords(getCounterValue(metrics.addedRecords())) - .removedRecords(getCounterValue(metrics.removedRecords())) - .totalRecords(getCounterValue(metrics.totalRecords())) - .addedFileSizeBytes(getCounterValue(metrics.addedFilesSizeInBytes())) - .removedFileSizeBytes(getCounterValue(metrics.removedFilesSizeInBytes())) - .totalFileSizeBytes(getCounterValue(metrics.totalFilesSizeInBytes())) - .totalDurationMs(getTimerValueMsOpt(metrics.totalDuration())) - .attempts(getCounterValueInt(metrics.attempts())) - .metadata(Collections.emptyMap()) - .build(); - } - - private static long getCounterValue(CounterResult counter) { - if (counter == null) { - return 0L; - } - return counter.value(); - } - - private static int getCounterValueInt(CounterResult counter) { - if (counter == null) { - return 0; - } - return (int) counter.value(); - } - - private static long getTimerValueMs(TimerResult timer) { - if (timer == null || timer.totalDuration() == null) { - return 0L; - } - return timer.totalDuration().toMillis(); - } - - private static Optional getTimerValueMsOpt(TimerResult timer) { - if (timer == null || timer.totalDuration() == null) { - return Optional.empty(); - } - return Optional.of(timer.totalDuration().toMillis()); - } - - private static Optional getMetadataValue( - java.util.Map metadata, String key) { - if (metadata == null) { - return Optional.empty(); - } - return Optional.ofNullable(metadata.get(key)); - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java new file mode 100644 index 0000000000..db8ac1325d --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.persistence.metrics; + +import java.time.Instant; +import java.util.Map; + +/** + * Base interface containing common identification fields shared by all metrics records. + * + *

    This interface defines the common fields that identify the source of a metrics report, + * including the report ID, catalog information, namespace, table name, timestamp, and metadata. + * + *

    Both {@link ScanMetricsRecord} and {@link CommitMetricsRecord} extend this interface to + * inherit these common fields while adding their own specific metrics. + * + *

    Note: Realm ID is intentionally not included in this interface. Multi-tenancy realm context + * should be obtained from the CDI-injected {@code RealmContext} at persistence time. This keeps + * catalog-specific code from needing to manage realm concerns. + */ +public interface MetricsRecordIdentity { + + /** + * Unique identifier for this report (UUID). + * + *

    This ID is generated when the record is created and serves as the primary key for the + * metrics record in persistence storage. + */ + String reportId(); + + /** + * Internal catalog ID. + * + *

    This matches the catalog entity ID in Polaris persistence, as defined by {@code + * PolarisEntityCore#getId()}. + */ + long catalogId(); + + /** + * Human-readable catalog name. + * + *

    The catalog name as known to clients. This is stored alongside the ID for query convenience + * and display purposes. + */ + String catalogName(); + + /** + * Dot-separated namespace path (e.g., "db.schema"). + * + *

    The namespace containing the table for which metrics are reported. + */ + String namespace(); + + /** + * Table name. + * + *

    The name of the table for which metrics are reported. + */ + String tableName(); + + /** + * Timestamp when the report was received. + * + *

    This is the server-side timestamp when the metrics report was processed, not the client-side + * timestamp when the operation occurred. + */ + Instant timestamp(); + + /** + * Additional metadata as key-value pairs. + * + *

    This map can contain additional contextual information from the original Iceberg report, + * including client-provided trace IDs or other correlation data. Persistence implementations can + * store and index specific metadata fields as needed. + */ + Map metadata(); +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java index 56bd435ccf..b33c095dc8 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/NoOpMetricsPersistence.java @@ -34,11 +34,6 @@ final class NoOpMetricsPersistence implements MetricsPersistence { NoOpMetricsPersistence() {} - @Override - public boolean isSupported() { - return false; - } - @Override public void writeScanReport(@Nonnull ScanMetricsRecord record) { // No-op diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java index fb15480a0c..c4e4ec6320 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java @@ -28,6 +28,15 @@ /** * Pagination {@linkplain Token token} for metrics queries, backed by the report ID (UUID). * + *

    Note: This is a reference implementation provided for convenience. It is + * not required by the {@link MetricsPersistence} SPI contract. Persistence backends are + * free to implement their own {@link Token} subclass optimized for their storage model (e.g., + * timestamp-based cursors, composite keys, continuation tokens). + * + *

    Only {@link org.apache.polaris.core.persistence.pagination.PageToken} (for requests) and + * {@link org.apache.polaris.core.persistence.pagination.Page} (for responses) are required by the + * SPI contract. + * *

    This token enables cursor-based pagination for metrics queries across different storage * backends. The report ID is used as the cursor because it is: * diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java index 5e85566c14..b9fd79ec29 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java @@ -18,9 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; -import java.time.Instant; import java.util.List; -import java.util.Map; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; @@ -28,50 +26,15 @@ * Backend-agnostic representation of an Iceberg scan metrics report. * *

    This record captures all relevant metrics from an Iceberg {@code ScanReport} along with - * contextual information such as realm, catalog, and request correlation data. + * contextual information such as catalog identification and table location. + * + *

    Common identification fields are inherited from {@link MetricsRecordIdentity}. + * + *

    Note: Realm ID is not included in this record. Multi-tenancy realm context should be obtained + * from the CDI-injected {@code RealmContext} at persistence time. */ @PolarisImmutable -public interface ScanMetricsRecord { - - // === Identification === - - /** Unique identifier for this report (UUID). */ - String reportId(); - - /** Multi-tenancy realm identifier. */ - String realmId(); - - /** Internal catalog ID. */ - String catalogId(); - - /** Human-readable catalog name. */ - String catalogName(); - - /** Dot-separated namespace path (e.g., "db.schema"). */ - String namespace(); - - /** Table name. */ - String tableName(); - - // === Timing === - - /** Timestamp when the report was received. */ - Instant timestamp(); - - // === Client Correlation === - - /** - * Client-provided trace ID from the metrics report metadata. - * - *

    This is an optional identifier that the Iceberg client may include in the report's metadata - * map (typically under the key "trace-id"). It allows clients to correlate this metrics report - * with their own distributed tracing system or query execution context. - * - *

    Note: Server-side tracing information (e.g., OpenTelemetry trace/span IDs) and principal - * information are not included in this record. The persistence implementation can obtain these - * from the ambient request context (OTel context, security context) at write time if needed. - */ - Optional reportTraceId(); +public interface ScanMetricsRecord extends MetricsRecordIdentity { // === Scan Context === @@ -146,11 +109,6 @@ public interface ScanMetricsRecord { /** Total size of delete files in bytes. */ long totalDeleteFileSizeBytes(); - // === Extensibility === - - /** Additional metadata as key-value pairs. */ - Map metadata(); - /** * Creates a new builder for ScanMetricsRecord. * From 9ec22eaf97db88565cd7b393d24009a54498b1dc Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Tue, 3 Feb 2026 18:48:56 -0800 Subject: [PATCH 05/12] Review comments --- .../iceberg/MetricsRecordConverter.java | 46 +++++++++---------- .../metrics/MetricsRecordIdentity.java | 44 ++++++++++++------ 2 files changed, 53 insertions(+), 37 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index e9d13f36d0..1ce754851f 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Optional; import java.util.UUID; +import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.metrics.CommitMetricsResult; import org.apache.iceberg.metrics.CommitReport; import org.apache.iceberg.metrics.CounterResult; @@ -44,8 +45,7 @@ * ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport) * .catalogId(catalog.getId()) * .catalogName(catalog.getName()) - * .namespace(namespace.toString()) - * .tableName(tableName) + * .tableIdentifier(TableIdentifier.of(namespace, tableName)) * .build(); * }

    */ @@ -80,8 +80,7 @@ public static final class ScanReportBuilder { private final ScanReport scanReport; private long catalogId; private String catalogName; - private String namespace; - private String tableName; + private TableIdentifier tableIdentifier; private ScanReportBuilder(ScanReport scanReport) { this.scanReport = scanReport; @@ -97,13 +96,14 @@ public ScanReportBuilder catalogName(String catalogName) { return this; } - public ScanReportBuilder namespace(String namespace) { - this.namespace = namespace; - return this; - } - - public ScanReportBuilder tableName(String tableName) { - this.tableName = tableName; + /** + * Sets the table identifier including namespace and table name. + * + * @param tableIdentifier the Iceberg table identifier + * @return this builder + */ + public ScanReportBuilder tableIdentifier(TableIdentifier tableIdentifier) { + this.tableIdentifier = tableIdentifier; return this; } @@ -116,8 +116,7 @@ public ScanMetricsRecord build() { .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) .catalogName(catalogName) - .namespace(namespace) - .tableName(tableName) + .tableIdentifier(tableIdentifier) .timestamp(Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) @@ -159,8 +158,7 @@ public static final class CommitReportBuilder { private final CommitReport commitReport; private long catalogId; private String catalogName; - private String namespace; - private String tableName; + private TableIdentifier tableIdentifier; private CommitReportBuilder(CommitReport commitReport) { this.commitReport = commitReport; @@ -176,13 +174,14 @@ public CommitReportBuilder catalogName(String catalogName) { return this; } - public CommitReportBuilder namespace(String namespace) { - this.namespace = namespace; - return this; - } - - public CommitReportBuilder tableName(String tableName) { - this.tableName = tableName; + /** + * Sets the table identifier including namespace and table name. + * + * @param tableIdentifier the Iceberg table identifier + * @return this builder + */ + public CommitReportBuilder tableIdentifier(TableIdentifier tableIdentifier) { + this.tableIdentifier = tableIdentifier; return this; } @@ -195,8 +194,7 @@ public CommitMetricsRecord build() { .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) .catalogName(catalogName) - .namespace(namespace) - .tableName(tableName) + .tableIdentifier(tableIdentifier) .timestamp(Instant.now()) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index db8ac1325d..498a201ab6 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -20,19 +20,39 @@ import java.time.Instant; import java.util.Map; +import org.apache.iceberg.catalog.TableIdentifier; /** * Base interface containing common identification fields shared by all metrics records. * *

    This interface defines the common fields that identify the source of a metrics report, - * including the report ID, catalog information, namespace, table name, timestamp, and metadata. + * including the report ID, catalog information, table identifier, timestamp, and metadata. * *

    Both {@link ScanMetricsRecord} and {@link CommitMetricsRecord} extend this interface to * inherit these common fields while adding their own specific metrics. * - *

    Note: Realm ID is intentionally not included in this interface. Multi-tenancy realm context - * should be obtained from the CDI-injected {@code RealmContext} at persistence time. This keeps - * catalog-specific code from needing to manage realm concerns. + *

    Design Decisions

    + * + *

    TableIdentifier vs separate namespace/tableName: We use Iceberg's {@link + * TableIdentifier} which encapsulates both namespace and table name. This aligns with how Iceberg + * reports identify tables and is consistent with Polaris entity patterns (e.g., {@code + * TableLikeEntity.getTableIdentifier()}). + * + *

    Catalog ID/Name vs CatalogEntity: We use separate primitive fields for catalog ID and + * name rather than {@code CatalogEntity} because: + * + *

      + *
    • {@code CatalogEntity} is a heavyweight object containing storage config, properties, and + * other data not relevant for metrics identification + *
    • {@code CatalogEntity} is not an Immutables-compatible interface, making it difficult to + * include in {@code @PolarisImmutable} generated classes + *
    • For metrics, we only need the catalog ID (for foreign key relationships) and name (for + * display/query convenience) + *
    + * + *

    Realm ID: Realm ID is intentionally not included in this interface. Multi-tenancy realm + * context should be obtained from the CDI-injected {@code RealmContext} at persistence time. This + * keeps catalog-specific code from needing to manage realm concerns. */ public interface MetricsRecordIdentity { @@ -61,18 +81,16 @@ public interface MetricsRecordIdentity { String catalogName(); /** - * Dot-separated namespace path (e.g., "db.schema"). + * Table identifier including namespace and table name. * - *

    The namespace containing the table for which metrics are reported. - */ - String namespace(); - - /** - * Table name. + *

    This uses Iceberg's {@link TableIdentifier} which encapsulates both the namespace path and + * the table name. The namespace can be accessed via {@link TableIdentifier#namespace()} and the + * table name via {@link TableIdentifier#name()}. * - *

    The name of the table for which metrics are reported. + *

    Example: For a table "my_table" in namespace "db.schema", use {@code + * TableIdentifier.of(Namespace.of("db", "schema"), "my_table")}. */ - String tableName(); + TableIdentifier tableIdentifier(); /** * Timestamp when the report was received. From b66a0d6e1737bc810c431ee75484bfe696dad72f Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 14:54:44 -0800 Subject: [PATCH 06/12] refactor: Remove TableIdentifier and catalogName from SPI records Per reviewer feedback: - Replace Iceberg's TableIdentifier with separate namespace/tableName strings in MetricsRecordIdentity to avoid Iceberg dependencies in Polaris SPI - Remove catalogName from records, keep only catalogId since catalog names can change over time (via rename operations) - Update MetricsQueryCriteria to use catalogId (OptionalLong) instead of catalogName - Update MetricsRecordConverter to extract namespace/tableName from TableIdentifier The service layer (MetricsRecordConverter) still accepts TableIdentifier and performs the conversion to primitives for the SPI records. --- .../iceberg/MetricsRecordConverter.java | 37 ++++++++------- .../metrics/MetricsQueryCriteria.java | 17 ++++--- .../metrics/MetricsRecordIdentity.java | 46 +++++++------------ 3 files changed, 47 insertions(+), 53 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index 1ce754851f..5c44ed6c38 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -44,7 +44,6 @@ *

    {@code
      * ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport)
      *     .catalogId(catalog.getId())
    - *     .catalogName(catalog.getName())
      *     .tableIdentifier(TableIdentifier.of(namespace, tableName))
      *     .build();
      * }
    @@ -75,11 +74,20 @@ public static CommitReportBuilder forCommitReport(CommitReport commitReport) { return new CommitReportBuilder(commitReport); } + /** + * Converts a TableIdentifier namespace to a dot-separated string. + * + * @param tableIdentifier the Iceberg table identifier + * @return dot-separated namespace string + */ + private static String namespaceToString(TableIdentifier tableIdentifier) { + return String.join(".", tableIdentifier.namespace().levels()); + } + /** Builder for converting ScanReport to ScanMetricsRecord. */ public static final class ScanReportBuilder { private final ScanReport scanReport; private long catalogId; - private String catalogName; private TableIdentifier tableIdentifier; private ScanReportBuilder(ScanReport scanReport) { @@ -91,14 +99,12 @@ public ScanReportBuilder catalogId(long catalogId) { return this; } - public ScanReportBuilder catalogName(String catalogName) { - this.catalogName = catalogName; - return this; - } - /** * Sets the table identifier including namespace and table name. * + *

    The namespace and table name will be extracted from the TableIdentifier and stored as + * separate primitive fields in the SPI record. + * * @param tableIdentifier the Iceberg table identifier * @return this builder */ @@ -115,8 +121,8 @@ public ScanMetricsRecord build() { return ScanMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .catalogName(catalogName) - .tableIdentifier(tableIdentifier) + .namespace(namespaceToString(tableIdentifier)) + .tableName(tableIdentifier.name()) .timestamp(Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) @@ -157,7 +163,6 @@ public ScanMetricsRecord build() { public static final class CommitReportBuilder { private final CommitReport commitReport; private long catalogId; - private String catalogName; private TableIdentifier tableIdentifier; private CommitReportBuilder(CommitReport commitReport) { @@ -169,14 +174,12 @@ public CommitReportBuilder catalogId(long catalogId) { return this; } - public CommitReportBuilder catalogName(String catalogName) { - this.catalogName = catalogName; - return this; - } - /** * Sets the table identifier including namespace and table name. * + *

    The namespace and table name will be extracted from the TableIdentifier and stored as + * separate primitive fields in the SPI record. + * * @param tableIdentifier the Iceberg table identifier * @return this builder */ @@ -193,8 +196,8 @@ public CommitMetricsRecord build() { return CommitMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .catalogName(catalogName) - .tableIdentifier(tableIdentifier) + .namespace(namespaceToString(tableIdentifier)) + .tableName(tableIdentifier.name()) .timestamp(Instant.now()) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index 98d00d742c..56afe9f264 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -39,7 +39,7 @@ * * * - * + * * *
    PatternFields UsedIndex Required
    By Table + TimecatalogName, namespace, tableName, startTime, endTimeYes (OSS)
    By Table + TimecatalogId, namespace, tableName, startTime, endTimeYes (OSS)
    By Time OnlystartTime, endTimePartial (timestamp index)
    * @@ -69,8 +69,13 @@ public interface MetricsQueryCriteria { // === Table Identification (optional) === - /** Catalog name to filter by. */ - Optional catalogName(); + /** + * Catalog ID to filter by. + * + *

    This is the internal catalog entity ID. Callers should resolve catalog names to IDs before + * querying, as catalog names can change over time. + */ + java.util.OptionalLong catalogId(); /** Namespace to filter by (dot-separated). */ Optional namespace(); @@ -117,7 +122,7 @@ static ImmutableMetricsQueryCriteria.Builder builder() { * *

    Pagination is handled separately via the {@code PageToken} parameter to query methods. * - * @param catalogName the catalog name + * @param catalogId the catalog entity ID * @param namespace the namespace (dot-separated) * @param tableName the table name * @param startTime the start time (inclusive) @@ -125,9 +130,9 @@ static ImmutableMetricsQueryCriteria.Builder builder() { * @return the query criteria */ static MetricsQueryCriteria forTable( - String catalogName, String namespace, String tableName, Instant startTime, Instant endTime) { + long catalogId, String namespace, String tableName, Instant startTime, Instant endTime) { return builder() - .catalogName(catalogName) + .catalogId(catalogId) .namespace(namespace) .tableName(tableName) .startTime(startTime) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index 498a201ab6..574ae77eb9 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -20,35 +20,25 @@ import java.time.Instant; import java.util.Map; -import org.apache.iceberg.catalog.TableIdentifier; /** * Base interface containing common identification fields shared by all metrics records. * *

    This interface defines the common fields that identify the source of a metrics report, - * including the report ID, catalog information, table identifier, timestamp, and metadata. + * including the report ID, catalog ID, table location, timestamp, and metadata. * *

    Both {@link ScanMetricsRecord} and {@link CommitMetricsRecord} extend this interface to * inherit these common fields while adding their own specific metrics. * *

    Design Decisions

    * - *

    TableIdentifier vs separate namespace/tableName: We use Iceberg's {@link - * TableIdentifier} which encapsulates both namespace and table name. This aligns with how Iceberg - * reports identify tables and is consistent with Polaris entity patterns (e.g., {@code - * TableLikeEntity.getTableIdentifier()}). + *

    Namespace/TableName as primitives: We use separate String fields for namespace and + * table name rather than Iceberg's {@code TableIdentifier} to avoid Iceberg dependencies in the + * Polaris SPI. The service layer can convert to/from {@code TableIdentifier} as needed. * - *

    Catalog ID/Name vs CatalogEntity: We use separate primitive fields for catalog ID and - * name rather than {@code CatalogEntity} because: - * - *

      - *
    • {@code CatalogEntity} is a heavyweight object containing storage config, properties, and - * other data not relevant for metrics identification - *
    • {@code CatalogEntity} is not an Immutables-compatible interface, making it difficult to - * include in {@code @PolarisImmutable} generated classes - *
    • For metrics, we only need the catalog ID (for foreign key relationships) and name (for - * display/query convenience) - *
    + *

    Catalog ID only (no name): We store only the catalog ID, not the catalog name. Catalog + * names can change over time (via rename operations), which would make querying historical metrics + * by name challenging. Queries should resolve catalog names to IDs using the current catalog state. * *

    Realm ID: Realm ID is intentionally not included in this interface. Multi-tenancy realm * context should be obtained from the CDI-injected {@code RealmContext} at persistence time. This @@ -68,29 +58,25 @@ public interface MetricsRecordIdentity { * Internal catalog ID. * *

    This matches the catalog entity ID in Polaris persistence, as defined by {@code - * PolarisEntityCore#getId()}. + * PolarisEntityCore#getId()}. The catalog name is not stored since it can change over time; + * queries should resolve names to IDs using the current catalog state. */ long catalogId(); /** - * Human-readable catalog name. + * Namespace path as a dot-separated string (e.g., "db.schema"). * - *

    The catalog name as known to clients. This is stored alongside the ID for query convenience - * and display purposes. + *

    This is the namespace portion of the table identifier. Multi-level namespaces are + * represented with dots separating levels. */ - String catalogName(); + String namespace(); /** - * Table identifier including namespace and table name. - * - *

    This uses Iceberg's {@link TableIdentifier} which encapsulates both the namespace path and - * the table name. The namespace can be accessed via {@link TableIdentifier#namespace()} and the - * table name via {@link TableIdentifier#name()}. + * Table name. * - *

    Example: For a table "my_table" in namespace "db.schema", use {@code - * TableIdentifier.of(Namespace.of("db", "schema"), "my_table")}. + *

    This is the table name portion of the table identifier, without the namespace prefix. */ - TableIdentifier tableIdentifier(); + String tableName(); /** * Timestamp when the report was received. From 7ac814ee969269ff7282861d7d3336c3bb654986 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 15:02:34 -0800 Subject: [PATCH 07/12] refactor: Use List for namespace instead of dot-separated string Per reviewer feedback, namespace is now represented as a List of individual levels rather than a dot-separated string. This avoids ambiguity when namespace segments contain dots. Changes: - MetricsRecordIdentity: namespace() now returns List - MetricsQueryCriteria: namespace() now returns List - MetricsRecordConverter: namespaceToList() converts Iceberg Namespace to List using Arrays.asList() The persistence implementation handles the serialization format. --- .../iceberg/MetricsRecordConverter.java | 14 ++++++----- .../metrics/MetricsQueryCriteria.java | 24 ++++++++++++++----- .../metrics/MetricsRecordIdentity.java | 10 ++++---- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index 5c44ed6c38..71128394ae 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -19,7 +19,9 @@ package org.apache.polaris.core.metrics.iceberg; import java.time.Instant; +import java.util.Arrays; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; @@ -75,13 +77,13 @@ public static CommitReportBuilder forCommitReport(CommitReport commitReport) { } /** - * Converts a TableIdentifier namespace to a dot-separated string. + * Converts a TableIdentifier namespace to a list of levels. * * @param tableIdentifier the Iceberg table identifier - * @return dot-separated namespace string + * @return namespace as a list of levels */ - private static String namespaceToString(TableIdentifier tableIdentifier) { - return String.join(".", tableIdentifier.namespace().levels()); + private static List namespaceToList(TableIdentifier tableIdentifier) { + return Arrays.asList(tableIdentifier.namespace().levels()); } /** Builder for converting ScanReport to ScanMetricsRecord. */ @@ -121,7 +123,7 @@ public ScanMetricsRecord build() { return ScanMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespaceToString(tableIdentifier)) + .namespace(namespaceToList(tableIdentifier)) .tableName(tableIdentifier.name()) .timestamp(Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) @@ -196,7 +198,7 @@ public CommitMetricsRecord build() { return CommitMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespaceToString(tableIdentifier)) + .namespace(namespaceToList(tableIdentifier)) .tableName(tableIdentifier.name()) .timestamp(Instant.now()) .snapshotId(commitReport.snapshotId()) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index 56afe9f264..736e0624c1 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -19,7 +19,10 @@ package org.apache.polaris.core.persistence.metrics; import java.time.Instant; +import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.OptionalLong; import org.apache.polaris.immutables.PolarisImmutable; /** @@ -75,10 +78,15 @@ public interface MetricsQueryCriteria { *

    This is the internal catalog entity ID. Callers should resolve catalog names to IDs before * querying, as catalog names can change over time. */ - java.util.OptionalLong catalogId(); + OptionalLong catalogId(); - /** Namespace to filter by (dot-separated). */ - Optional namespace(); + /** + * Namespace to filter by. + * + *

    The namespace is represented as a list of levels to avoid ambiguity when segments contain + * dots. An empty list means no namespace filter is applied. + */ + List namespace(); /** Table name to filter by. */ Optional tableName(); @@ -104,7 +112,7 @@ public interface MetricsQueryCriteria { * The OSS codebase provides basic support, but performance optimizations may be needed for * high-volume deployments. */ - java.util.Map metadata(); + Map metadata(); // === Factory Methods === @@ -123,14 +131,18 @@ static ImmutableMetricsQueryCriteria.Builder builder() { *

    Pagination is handled separately via the {@code PageToken} parameter to query methods. * * @param catalogId the catalog entity ID - * @param namespace the namespace (dot-separated) + * @param namespace the namespace as a list of levels * @param tableName the table name * @param startTime the start time (inclusive) * @param endTime the end time (exclusive) * @return the query criteria */ static MetricsQueryCriteria forTable( - long catalogId, String namespace, String tableName, Instant startTime, Instant endTime) { + long catalogId, + List namespace, + String tableName, + Instant startTime, + Instant endTime) { return builder() .catalogId(catalogId) .namespace(namespace) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index 574ae77eb9..afbcd1bf4d 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -19,6 +19,7 @@ package org.apache.polaris.core.persistence.metrics; import java.time.Instant; +import java.util.List; import java.util.Map; /** @@ -64,12 +65,13 @@ public interface MetricsRecordIdentity { long catalogId(); /** - * Namespace path as a dot-separated string (e.g., "db.schema"). + * Namespace path as a list of levels (e.g., ["db", "schema"]). * - *

    This is the namespace portion of the table identifier. Multi-level namespaces are - * represented with dots separating levels. + *

    This is the namespace portion of the table identifier. Using a list avoids ambiguity when + * namespace segments contain dots. The persistence implementation handles the serialization + * format. */ - String namespace(); + List namespace(); /** * Table name. From 4a3bc9fbc0f9d5205a3def5f142a5a59c552e549 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 15:25:52 -0800 Subject: [PATCH 08/12] refactor: Use tableId instead of tableName in metrics records Per reviewer feedback: - r2766326028: Use table ID (same as catalog ID) since table names can change - r2766343275: Avoid denormalizing table names to prevent correctness issues - r2766321215: Return builder with table info, add time ranges at call site Changes: - MetricsRecordIdentity: tableName() -> tableId() (long) - MetricsQueryCriteria: tableName() -> tableId() (OptionalLong) - MetricsQueryCriteria.forTable(): Returns builder with catalogId/tableId - MetricsRecordConverter: tableIdentifier(TableIdentifier) -> tableId(long) + namespace(List) The caller (PersistingMetricsReporter) now needs to resolve table entity ID before creating records, similar to how catalogId is resolved. --- .../iceberg/MetricsRecordConverter.java | 73 +++++++++++-------- .../metrics/MetricsQueryCriteria.java | 47 ++++++------ .../metrics/MetricsRecordIdentity.java | 21 +++--- 3 files changed, 78 insertions(+), 63 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index 71128394ae..1a118d3e46 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -19,13 +19,11 @@ package org.apache.polaris.core.metrics.iceberg; import java.time.Instant; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.metrics.CommitMetricsResult; import org.apache.iceberg.metrics.CommitReport; import org.apache.iceberg.metrics.CounterResult; @@ -46,7 +44,8 @@ *

    {@code
      * ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport)
      *     .catalogId(catalog.getId())
    - *     .tableIdentifier(TableIdentifier.of(namespace, tableName))
    + *     .tableId(tableEntity.getId())
    + *     .namespace(namespace)
      *     .build();
      * }
    */ @@ -76,21 +75,12 @@ public static CommitReportBuilder forCommitReport(CommitReport commitReport) { return new CommitReportBuilder(commitReport); } - /** - * Converts a TableIdentifier namespace to a list of levels. - * - * @param tableIdentifier the Iceberg table identifier - * @return namespace as a list of levels - */ - private static List namespaceToList(TableIdentifier tableIdentifier) { - return Arrays.asList(tableIdentifier.namespace().levels()); - } - /** Builder for converting ScanReport to ScanMetricsRecord. */ public static final class ScanReportBuilder { private final ScanReport scanReport; private long catalogId; - private TableIdentifier tableIdentifier; + private long tableId; + private List namespace = Collections.emptyList(); private ScanReportBuilder(ScanReport scanReport) { this.scanReport = scanReport; @@ -102,16 +92,26 @@ public ScanReportBuilder catalogId(long catalogId) { } /** - * Sets the table identifier including namespace and table name. + * Sets the table entity ID. * - *

    The namespace and table name will be extracted from the TableIdentifier and stored as - * separate primitive fields in the SPI record. + *

    This is the internal Polaris entity ID for the table. * - * @param tableIdentifier the Iceberg table identifier + * @param tableId the table entity ID * @return this builder */ - public ScanReportBuilder tableIdentifier(TableIdentifier tableIdentifier) { - this.tableIdentifier = tableIdentifier; + public ScanReportBuilder tableId(long tableId) { + this.tableId = tableId; + return this; + } + + /** + * Sets the namespace as a list of levels. + * + * @param namespace the namespace levels + * @return this builder + */ + public ScanReportBuilder namespace(List namespace) { + this.namespace = namespace != null ? namespace : Collections.emptyList(); return this; } @@ -123,8 +123,8 @@ public ScanMetricsRecord build() { return ScanMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespaceToList(tableIdentifier)) - .tableName(tableIdentifier.name()) + .namespace(namespace) + .tableId(tableId) .timestamp(Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) @@ -165,7 +165,8 @@ public ScanMetricsRecord build() { public static final class CommitReportBuilder { private final CommitReport commitReport; private long catalogId; - private TableIdentifier tableIdentifier; + private long tableId; + private List namespace = Collections.emptyList(); private CommitReportBuilder(CommitReport commitReport) { this.commitReport = commitReport; @@ -177,16 +178,26 @@ public CommitReportBuilder catalogId(long catalogId) { } /** - * Sets the table identifier including namespace and table name. + * Sets the table entity ID. + * + *

    This is the internal Polaris entity ID for the table. * - *

    The namespace and table name will be extracted from the TableIdentifier and stored as - * separate primitive fields in the SPI record. + * @param tableId the table entity ID + * @return this builder + */ + public CommitReportBuilder tableId(long tableId) { + this.tableId = tableId; + return this; + } + + /** + * Sets the namespace as a list of levels. * - * @param tableIdentifier the Iceberg table identifier + * @param namespace the namespace levels * @return this builder */ - public CommitReportBuilder tableIdentifier(TableIdentifier tableIdentifier) { - this.tableIdentifier = tableIdentifier; + public CommitReportBuilder namespace(List namespace) { + this.namespace = namespace != null ? namespace : Collections.emptyList(); return this; } @@ -198,8 +209,8 @@ public CommitMetricsRecord build() { return CommitMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespaceToList(tableIdentifier)) - .tableName(tableIdentifier.name()) + .namespace(namespace) + .tableId(tableId) .timestamp(Instant.now()) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index 736e0624c1..bf41995bfd 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -42,7 +42,7 @@ * * * - * + * * *
    PatternFields UsedIndex Required
    By Table + TimecatalogId, namespace, tableName, startTime, endTimeYes (OSS)
    By Table + TimecatalogId, tableId, startTime, endTimeYes (OSS)
    By Time OnlystartTime, endTimePartial (timestamp index)
    * @@ -88,8 +88,13 @@ public interface MetricsQueryCriteria { */ List namespace(); - /** Table name to filter by. */ - Optional tableName(); + /** + * Table entity ID to filter by. + * + *

    This is the internal table entity ID. Callers should resolve table names to IDs before + * querying, as table names can change over time. + */ + OptionalLong tableId(); // === Time Range === @@ -126,30 +131,26 @@ static ImmutableMetricsQueryCriteria.Builder builder() { } /** - * Creates criteria for querying by table and time range. + * Creates a builder pre-populated with table identification info. + * + *

    This allows the caller to add time ranges and other filters at the call site. This pattern + * is useful when table info is resolved in one place and time ranges are added elsewhere. + * + *

    Example usage: * - *

    Pagination is handled separately via the {@code PageToken} parameter to query methods. + *

    {@code
    +   * MetricsQueryCriteria criteria = MetricsQueryCriteria.forTable(catalogId, tableId)
    +   *     .startTime(startTime)
    +   *     .endTime(endTime)
    +   *     .build();
    +   * }
    * * @param catalogId the catalog entity ID - * @param namespace the namespace as a list of levels - * @param tableName the table name - * @param startTime the start time (inclusive) - * @param endTime the end time (exclusive) - * @return the query criteria + * @param tableId the table entity ID + * @return a builder pre-populated with table info, ready for adding time ranges */ - static MetricsQueryCriteria forTable( - long catalogId, - List namespace, - String tableName, - Instant startTime, - Instant endTime) { - return builder() - .catalogId(catalogId) - .namespace(namespace) - .tableName(tableName) - .startTime(startTime) - .endTime(endTime) - .build(); + static ImmutableMetricsQueryCriteria.Builder forTable(long catalogId, long tableId) { + return builder().catalogId(catalogId).tableId(tableId); } /** diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index afbcd1bf4d..7d31302e54 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -33,13 +33,14 @@ * *

    Design Decisions

    * - *

    Namespace/TableName as primitives: We use separate String fields for namespace and - * table name rather than Iceberg's {@code TableIdentifier} to avoid Iceberg dependencies in the - * Polaris SPI. The service layer can convert to/from {@code TableIdentifier} as needed. + *

    Entity IDs only (no names): We store only catalog ID and table ID, not their names. + * Names can change over time (via rename operations), which would make querying historical metrics + * by name challenging and lead to correctness issues. Queries should resolve names to IDs using the + * current catalog state. * - *

    Catalog ID only (no name): We store only the catalog ID, not the catalog name. Catalog - * names can change over time (via rename operations), which would make querying historical metrics - * by name challenging. Queries should resolve catalog names to IDs using the current catalog state. + *

    Namespace as List<String>: Namespaces are stored as a list of levels rather than + * a dot-separated string to avoid ambiguity when namespace segments contain dots. The persistence + * implementation handles the serialization format. * *

    Realm ID: Realm ID is intentionally not included in this interface. Multi-tenancy realm * context should be obtained from the CDI-injected {@code RealmContext} at persistence time. This @@ -74,11 +75,13 @@ public interface MetricsRecordIdentity { List namespace(); /** - * Table name. + * Internal table entity ID. * - *

    This is the table name portion of the table identifier, without the namespace prefix. + *

    This matches the table entity ID in Polaris persistence, as defined by {@code + * PolarisEntityCore#getId()}. The table name is not stored since it can change over time; queries + * should resolve names to IDs using the current catalog state. */ - String tableName(); + long tableId(); /** * Timestamp when the report was received. From 7d4212c29b545c4dadc142cf9a6a9a1278947f6b Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Wed, 4 Feb 2026 18:04:35 -0800 Subject: [PATCH 09/12] refactor: Remove namespace from MetricsQueryCriteria Per reviewer feedback - since we query by tableId, namespace is implicit. If users want to query by namespace, the service layer should resolve namespace to table IDs using the current catalog state, then query by those IDs. This avoids confusion with table moves over time. Namespace is still stored in MetricsRecordIdentity for display purposes. --- .../persistence/metrics/MetricsQueryCriteria.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index bf41995bfd..a6bf10b952 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -19,7 +19,6 @@ package org.apache.polaris.core.persistence.metrics; import java.time.Instant; -import java.util.List; import java.util.Map; import java.util.Optional; import java.util.OptionalLong; @@ -80,19 +79,16 @@ public interface MetricsQueryCriteria { */ OptionalLong catalogId(); - /** - * Namespace to filter by. - * - *

    The namespace is represented as a list of levels to avoid ambiguity when segments contain - * dots. An empty list means no namespace filter is applied. - */ - List namespace(); - /** * Table entity ID to filter by. * *

    This is the internal table entity ID. Callers should resolve table names to IDs before * querying, as table names can change over time. + * + *

    Note: Namespace is intentionally not included as a query filter. Since we query by table ID, + * the namespace is implicit. If users want to query by namespace, the service layer should + * resolve namespace to table IDs using the current catalog state, then query by those IDs. This + * avoids confusion with table moves over time. */ OptionalLong tableId(); From 53a00cd34c166d86c366152009f5ef35026f6ed9 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 14:17:10 -0800 Subject: [PATCH 10/12] feat: mark Metrics Persistence SPI as @Beta (experimental) Added @Beta annotation from Guava to all public types in the Metrics Persistence SPI package to signal that this API is experimental and may change in future releases. Annotated types: - MetricsPersistence - ScanMetricsRecord - CommitMetricsRecord - MetricsQueryCriteria - MetricsRecordIdentity - ReportIdToken --- .../core/persistence/metrics/CommitMetricsRecord.java | 5 +++++ .../polaris/core/persistence/metrics/MetricsPersistence.java | 5 +++++ .../core/persistence/metrics/MetricsQueryCriteria.java | 5 +++++ .../core/persistence/metrics/MetricsRecordIdentity.java | 5 +++++ .../polaris/core/persistence/metrics/ReportIdToken.java | 5 +++++ .../polaris/core/persistence/metrics/ScanMetricsRecord.java | 5 +++++ 6 files changed, 30 insertions(+) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java index 2986beb0fb..6d67408cba 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/CommitMetricsRecord.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; @@ -31,7 +32,11 @@ * *

    Note: Realm ID is not included in this record. Multi-tenancy realm context should be obtained * from the CDI-injected {@code RealmContext} at persistence time. + * + *

    Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. */ +@Beta @PolarisImmutable public interface CommitMetricsRecord extends MetricsRecordIdentity { diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java index ae7e6f7ec0..1e9865701e 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsPersistence.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import jakarta.annotation.Nonnull; import org.apache.polaris.core.persistence.pagination.Page; import org.apache.polaris.core.persistence.pagination.PageToken; @@ -72,10 +73,14 @@ *

    The {@link ReportIdToken} provides a reference cursor implementation based on report ID * (UUID), but backends may use other cursor strategies internally. * + *

    Note: This SPI is currently experimental and not yet implemented in all persistence + * backends. The API may change in future releases. + * * @see PageToken * @see Page * @see ReportIdToken */ +@Beta public interface MetricsPersistence { /** A no-op implementation for backends that don't support metrics persistence. */ diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java index a6bf10b952..210fa39096 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsQueryCriteria.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import java.time.Instant; import java.util.Map; import java.util.Optional; @@ -49,6 +50,9 @@ * the {@link #metadata()} filter map. Client-provided correlation data should be stored in the * metrics record's metadata map and can be filtered using the metadata criteria. * + *

    Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. + * *

    Pagination

    * *

    Pagination is handled via the {@link org.apache.polaris.core.persistence.pagination.PageToken} @@ -66,6 +70,7 @@ * @see org.apache.polaris.core.persistence.pagination.Page * @see ReportIdToken */ +@Beta @PolarisImmutable public interface MetricsQueryCriteria { diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index 7d31302e54..077df90b97 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import java.time.Instant; import java.util.List; import java.util.Map; @@ -45,7 +46,11 @@ *

    Realm ID: Realm ID is intentionally not included in this interface. Multi-tenancy realm * context should be obtained from the CDI-injected {@code RealmContext} at persistence time. This * keeps catalog-specific code from needing to manage realm concerns. + * + *

    Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. */ +@Beta public interface MetricsRecordIdentity { /** diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java index c4e4ec6320..f3e3846953 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ReportIdToken.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.google.common.annotations.Beta; import jakarta.annotation.Nullable; import org.apache.polaris.core.persistence.pagination.Token; import org.apache.polaris.immutables.PolarisImmutable; @@ -54,7 +55,11 @@ *

  • NoSQL: Use report ID as partition/sort key cursor *
  • Time-series: Combine with timestamp for efficient range scans * + * + *

    Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. */ +@Beta @PolarisImmutable @JsonSerialize(as = ImmutableReportIdToken.class) @JsonDeserialize(as = ImmutableReportIdToken.class) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java index b9fd79ec29..44947d8f75 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/ScanMetricsRecord.java @@ -18,6 +18,7 @@ */ package org.apache.polaris.core.persistence.metrics; +import com.google.common.annotations.Beta; import java.util.List; import java.util.Optional; import org.apache.polaris.immutables.PolarisImmutable; @@ -32,7 +33,11 @@ * *

    Note: Realm ID is not included in this record. Multi-tenancy realm context should be obtained * from the CDI-injected {@code RealmContext} at persistence time. + * + *

    Note: This type is part of the experimental Metrics Persistence SPI and may change in + * future releases. */ +@Beta @PolarisImmutable public interface ScanMetricsRecord extends MetricsRecordIdentity { From 50faca329105cfd3066d38bc16207fb17b48daab Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Thu, 5 Feb 2026 15:57:17 -0800 Subject: [PATCH 11/12] feat: Add timestamp() method to MetricsRecordConverter builders Allow callers to specify the timestamp for metrics records, defaulting to Instant.now() if not provided. This enables the reporter to use the received timestamp rather than the conversion time. --- .../iceberg/MetricsRecordConverter.java | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index 1a118d3e46..039011c2d0 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -81,6 +81,7 @@ public static final class ScanReportBuilder { private long catalogId; private long tableId; private List namespace = Collections.emptyList(); + private Instant timestamp; private ScanReportBuilder(ScanReport scanReport) { this.scanReport = scanReport; @@ -115,6 +116,20 @@ public ScanReportBuilder namespace(List namespace) { return this; } + /** + * Sets the timestamp for the metrics record. + * + *

    This should be the time the metrics report was received by the server, which may differ + * from the time it was recorded by the client. + * + * @param timestamp the timestamp + * @return this builder + */ + public ScanReportBuilder timestamp(Instant timestamp) { + this.timestamp = timestamp; + return this; + } + public ScanMetricsRecord build() { ScanMetricsResult metrics = scanReport.scanMetrics(); Map reportMetadata = @@ -125,7 +140,7 @@ public ScanMetricsRecord build() { .catalogId(catalogId) .namespace(namespace) .tableId(tableId) - .timestamp(Instant.now()) + .timestamp(timestamp != null ? timestamp : Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) .schemaId(Optional.of(scanReport.schemaId())) .filterExpression( @@ -167,6 +182,7 @@ public static final class CommitReportBuilder { private long catalogId; private long tableId; private List namespace = Collections.emptyList(); + private Instant timestamp; private CommitReportBuilder(CommitReport commitReport) { this.commitReport = commitReport; @@ -201,6 +217,20 @@ public CommitReportBuilder namespace(List namespace) { return this; } + /** + * Sets the timestamp for the metrics record. + * + *

    This should be the time the metrics report was received by the server, which may differ + * from the time it was recorded by the client. + * + * @param timestamp the timestamp + * @return this builder + */ + public CommitReportBuilder timestamp(Instant timestamp) { + this.timestamp = timestamp; + return this; + } + public CommitMetricsRecord build() { CommitMetricsResult metrics = commitReport.commitMetrics(); Map reportMetadata = @@ -211,7 +241,7 @@ public CommitMetricsRecord build() { .catalogId(catalogId) .namespace(namespace) .tableId(tableId) - .timestamp(Instant.now()) + .timestamp(timestamp != null ? timestamp : Instant.now()) .snapshotId(commitReport.snapshotId()) .sequenceNumber(Optional.of(commitReport.sequenceNumber())) .operation(commitReport.operation()) From afaa356264fc0731dfdcee45547d9fa22d409fe6 Mon Sep 17 00:00:00 2001 From: Anand Kumar Sankaran Date: Fri, 6 Feb 2026 10:37:36 -0800 Subject: [PATCH 12/12] refactor: Remove namespace from MetricsRecordIdentity and MetricsRecordConverter Address PR review comment singhpk234#r2772722282 - remove namespace entirely since tableId uniquely identifies the table. Namespace can be derived from the table entity if needed. --- .../iceberg/MetricsRecordConverter.java | 28 ------------------- .../metrics/MetricsRecordIdentity.java | 28 ++++++------------- 2 files changed, 8 insertions(+), 48 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java index 039011c2d0..0289ce276a 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/metrics/iceberg/MetricsRecordConverter.java @@ -20,7 +20,6 @@ import java.time.Instant; import java.util.Collections; -import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; @@ -45,7 +44,6 @@ * ScanMetricsRecord record = MetricsRecordConverter.forScanReport(scanReport) * .catalogId(catalog.getId()) * .tableId(tableEntity.getId()) - * .namespace(namespace) * .build(); * } */ @@ -80,7 +78,6 @@ public static final class ScanReportBuilder { private final ScanReport scanReport; private long catalogId; private long tableId; - private List namespace = Collections.emptyList(); private Instant timestamp; private ScanReportBuilder(ScanReport scanReport) { @@ -105,17 +102,6 @@ public ScanReportBuilder tableId(long tableId) { return this; } - /** - * Sets the namespace as a list of levels. - * - * @param namespace the namespace levels - * @return this builder - */ - public ScanReportBuilder namespace(List namespace) { - this.namespace = namespace != null ? namespace : Collections.emptyList(); - return this; - } - /** * Sets the timestamp for the metrics record. * @@ -138,7 +124,6 @@ public ScanMetricsRecord build() { return ScanMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespace) .tableId(tableId) .timestamp(timestamp != null ? timestamp : Instant.now()) .snapshotId(Optional.of(scanReport.snapshotId())) @@ -181,7 +166,6 @@ public static final class CommitReportBuilder { private final CommitReport commitReport; private long catalogId; private long tableId; - private List namespace = Collections.emptyList(); private Instant timestamp; private CommitReportBuilder(CommitReport commitReport) { @@ -206,17 +190,6 @@ public CommitReportBuilder tableId(long tableId) { return this; } - /** - * Sets the namespace as a list of levels. - * - * @param namespace the namespace levels - * @return this builder - */ - public CommitReportBuilder namespace(List namespace) { - this.namespace = namespace != null ? namespace : Collections.emptyList(); - return this; - } - /** * Sets the timestamp for the metrics record. * @@ -239,7 +212,6 @@ public CommitMetricsRecord build() { return CommitMetricsRecord.builder() .reportId(UUID.randomUUID().toString()) .catalogId(catalogId) - .namespace(namespace) .tableId(tableId) .timestamp(timestamp != null ? timestamp : Instant.now()) .snapshotId(commitReport.snapshotId()) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java index 077df90b97..51f819ea06 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/persistence/metrics/MetricsRecordIdentity.java @@ -20,28 +20,24 @@ import com.google.common.annotations.Beta; import java.time.Instant; -import java.util.List; import java.util.Map; /** * Base interface containing common identification fields shared by all metrics records. * *

    This interface defines the common fields that identify the source of a metrics report, - * including the report ID, catalog ID, table location, timestamp, and metadata. + * including the report ID, catalog ID, table ID, timestamp, and metadata. * *

    Both {@link ScanMetricsRecord} and {@link CommitMetricsRecord} extend this interface to * inherit these common fields while adding their own specific metrics. * *

    Design Decisions

    * - *

    Entity IDs only (no names): We store only catalog ID and table ID, not their names. - * Names can change over time (via rename operations), which would make querying historical metrics - * by name challenging and lead to correctness issues. Queries should resolve names to IDs using the - * current catalog state. - * - *

    Namespace as List<String>: Namespaces are stored as a list of levels rather than - * a dot-separated string to avoid ambiguity when namespace segments contain dots. The persistence - * implementation handles the serialization format. + *

    Entity IDs only (no names): We store only catalog ID and table ID, not their names or + * namespace paths. Names can change over time (via rename operations), which would make querying + * historical metrics by name challenging and lead to correctness issues. Queries should resolve + * names to IDs using the current catalog state. The table ID uniquely identifies the table, and the + * namespace can be derived from the table entity if needed. * *

    Realm ID: Realm ID is intentionally not included in this interface. Multi-tenancy realm * context should be obtained from the CDI-injected {@code RealmContext} at persistence time. This @@ -70,21 +66,13 @@ public interface MetricsRecordIdentity { */ long catalogId(); - /** - * Namespace path as a list of levels (e.g., ["db", "schema"]). - * - *

    This is the namespace portion of the table identifier. Using a list avoids ambiguity when - * namespace segments contain dots. The persistence implementation handles the serialization - * format. - */ - List namespace(); - /** * Internal table entity ID. * *

    This matches the table entity ID in Polaris persistence, as defined by {@code * PolarisEntityCore#getId()}. The table name is not stored since it can change over time; queries - * should resolve names to IDs using the current catalog state. + * should resolve names to IDs using the current catalog state. The namespace can be derived from + * the table entity if needed. */ long tableId();