diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java index 1c08ec3b26fd..65b3abcd413c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java @@ -1077,6 +1077,47 @@ default Future modifyTableAsync(TableDescriptor td) throws IOException { */ Future modifyTableAsync(TableDescriptor td, boolean reopenRegions) throws IOException; + /** + * Reopen all regions of a table. This is useful after calling + * {@link #modifyTableAsync(TableDescriptor, boolean)} with reopenRegions=false to gradually roll + * out table descriptor changes to regions. Regions are reopened in-place (no move). + * @param tableName table whose regions to reopen + * @throws IOException if a remote or network exception occurs + */ + default void reopenTableRegions(TableName tableName) throws IOException { + get(reopenTableRegionsAsync(tableName), getSyncWaitTimeout(), TimeUnit.MILLISECONDS); + } + + /** + * Reopen specific regions of a table. Useful for canary testing table descriptor changes on a + * subset of regions before rolling out to the entire table. + * @param tableName table whose regions to reopen + * @param regions specific regions to reopen + * @throws IOException if a remote or network exception occurs + */ + default void reopenTableRegions(TableName tableName, List regions) + throws IOException { + get(reopenTableRegionsAsync(tableName, regions), getSyncWaitTimeout(), TimeUnit.MILLISECONDS); + } + + /** + * Asynchronously reopen all regions of a table. + * @param tableName table whose regions to reopen + * @return Future for tracking completion + * @throws IOException if a remote or network exception occurs + */ + Future reopenTableRegionsAsync(TableName tableName) throws IOException; + + /** + * Asynchronously reopen specific regions of a table. + * @param tableName table whose regions to reopen + * @param regions specific regions to reopen + * @return Future for tracking completion + * @throws IOException if a remote or network exception occurs + */ + Future reopenTableRegionsAsync(TableName tableName, List regions) + throws IOException; + /** * Change the store file tracker of the given table. * @param tableName the table you want to change diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java index e6bf6c3d28e0..7117fd4fd33f 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java @@ -511,6 +511,17 @@ public Future modifyTableAsync(TableDescriptor td, boolean reopenRegions) return admin.modifyTable(td, reopenRegions); } + @Override + public Future reopenTableRegionsAsync(TableName tableName) throws IOException { + return admin.reopenTableRegions(tableName).toCompletableFuture(); + } + + @Override + public Future reopenTableRegionsAsync(TableName tableName, List regions) + throws IOException { + return admin.reopenTableRegions(tableName, regions).toCompletableFuture(); + } + @Override public Future modifyTableStoreFileTrackerAsync(TableName tableName, String dstSFT) throws IOException { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java index ec0556f20ac1..56211cedc493 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java @@ -213,6 +213,24 @@ default CompletableFuture modifyTable(TableDescriptor desc) { */ CompletableFuture modifyTable(TableDescriptor desc, boolean reopenRegions); + /** + * Reopen all regions of a table. This is useful after calling + * {@link #modifyTable(TableDescriptor, boolean)} with reopenRegions=false to gradually roll out + * table descriptor changes to regions. Regions are reopened in-place (no move). + * @param tableName table whose regions to reopen + * @return CompletableFuture that completes when all regions have been reopened + */ + CompletableFuture reopenTableRegions(TableName tableName); + + /** + * Reopen specific regions of a table. Useful for canary testing table descriptor changes on a + * subset of regions before rolling out to the entire table. + * @param tableName table whose regions to reopen + * @param regions specific regions to reopen + * @return CompletableFuture that completes when specified regions have been reopened + */ + CompletableFuture reopenTableRegions(TableName tableName, List regions); + /** * Change the store file tracker of the given table. * @param tableName the table you want to change diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java index b1fb2be13547..8132b184809c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java @@ -161,6 +161,16 @@ public CompletableFuture modifyTable(TableDescriptor desc, boolean reopenR return wrap(rawAdmin.modifyTable(desc, reopenRegions)); } + @Override + public CompletableFuture reopenTableRegions(TableName tableName) { + return wrap(rawAdmin.reopenTableRegions(tableName)); + } + + @Override + public CompletableFuture reopenTableRegions(TableName tableName, List regions) { + return wrap(rawAdmin.reopenTableRegions(tableName, regions)); + } + @Override public CompletableFuture modifyTableStoreFileTracker(TableName tableName, String dstSFT) { return wrap(rawAdmin.modifyTableStoreFileTracker(tableName, dstSFT)); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java index 83780a4a1219..a3c177577dfd 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java @@ -263,6 +263,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineRegionResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReopenTableRegionsRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReopenTableRegionsResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RollAllWALWritersRequest; @@ -754,6 +756,21 @@ public CompletableFuture modifyTable(TableDescriptor desc, boolean reopenR new ModifyTableProcedureBiConsumer(this, desc.getTableName())); } + @Override + public CompletableFuture reopenTableRegions(TableName tableName) { + return reopenTableRegions(tableName, Collections.emptyList()); + } + + @Override + public CompletableFuture reopenTableRegions(TableName tableName, List regions) { + List regionNames = regions.stream().map(RegionInfo::getRegionName).toList(); + return this. procedureCall(tableName, + RequestConverter.buildReopenTableRegionsRequest(tableName, regionNames, ng.getNonceGroup(), + ng.newNonce()), + (s, c, req, done) -> s.reopenTableRegions(c, req, done), (resp) -> resp.getProcId(), + new ReopenTableRegionsProcedureBiConsumer(this, tableName)); + } + @Override public CompletableFuture modifyTableStoreFileTracker(TableName tableName, String dstSFT) { return this. regionNames, final long nonceGroup, final long nonce) { + ReopenTableRegionsRequest.Builder builder = ReopenTableRegionsRequest.newBuilder(); + builder.setTableName(ProtobufUtil.toProtoTableName(tableName)); + + if (regionNames != null && !regionNames.isEmpty()) { + for (byte[] regionName : regionNames) { + builder.addRegionNames(UnsafeByteOperations.unsafeWrap(regionName)); + } + } + + builder.setNonceGroup(nonceGroup); + builder.setNonce(nonce); + + return builder.build(); + } + public static ModifyTableStoreFileTrackerRequest buildModifyTableStoreFileTrackerRequest( final TableName tableName, final String dstSFT, final long nonceGroup, final long nonce) { ModifyTableStoreFileTrackerRequest.Builder builder = diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto index 6dd6ee723b02..f475d26060d0 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto @@ -211,6 +211,17 @@ message ModifyTableResponse { optional uint64 proc_id = 1; } +message ReopenTableRegionsRequest { + required TableName table_name = 1; + repeated bytes region_names = 2; // empty = all regions + optional uint64 nonce_group = 3 [default = 0]; + optional uint64 nonce = 4 [default = 0]; +} + +message ReopenTableRegionsResponse { + optional uint64 proc_id = 1; +} + message FlushTableRequest { required TableName table_name = 1; repeated bytes column_family = 2; @@ -910,6 +921,13 @@ service MasterService { rpc ModifyTable(ModifyTableRequest) returns(ModifyTableResponse); + /** + * Reopen regions of a table. Regions are reopened in-place without moving. + * Useful for rolling out table descriptor changes after modifyTable(reopenRegions=false). + */ + rpc ReopenTableRegions(ReopenTableRegionsRequest) + returns(ReopenTableRegionsResponse); + /** Creates a new table asynchronously */ rpc CreateTable(CreateTableRequest) returns(CreateTableResponse); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 22d3ab69b51c..9e7cfa4dba65 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -4251,6 +4251,54 @@ protected String getDescription() { } + /** + * Reopen regions provided in the argument. Applies throttling to the procedure to avoid + * overwhelming the system. This is used by the reopenTableRegions methods in the Admin API via + * HMaster. + * @param tableName The current table name + * @param regionNames The region names of the regions to reopen + * @param nonceGroup Identifier for the source of the request, a client or process + * @param nonce A unique identifier for this operation from the client or process identified + * by nonceGroup (the source must ensure each operation gets a + * unique id). + * @return procedure Id + * @throws IOException if reopening region fails while running procedure + */ + long reopenRegionsThrottled(final TableName tableName, final List regionNames, + final long nonceGroup, final long nonce) throws IOException { + + checkInitialized(); + + if (!tableStateManager.isTablePresent(tableName)) { + throw new TableNotFoundException(tableName); + } + + return MasterProcedureUtil + .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) { + @Override + protected void run() throws IOException { + ReopenTableRegionsProcedure proc; + if (regionNames.isEmpty()) { + proc = ReopenTableRegionsProcedure.throttled(getConfiguration(), + getTableDescriptors().get(tableName)); + } else { + proc = ReopenTableRegionsProcedure.throttled(getConfiguration(), + getTableDescriptors().get(tableName), regionNames); + } + + LOG.info("{} throttled reopening {} regions for table {}", getClientIdAuditPrefix(), + regionNames.isEmpty() ? "all" : regionNames.size(), tableName); + + submitProcedure(proc); + } + + @Override + protected String getDescription() { + return "Throttled ReopenTableRegionsProcedure for " + tableName; + } + }); + } + @Override public ReplicationPeerManager getReplicationPeerManager() { return replicationPeerManager; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index e9e0f970ef8d..e9ca086863f5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -320,6 +320,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RegionSpecifierAndState; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReopenTableRegionsRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReopenTableRegionsResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RollAllWALWritersRequest; @@ -1554,6 +1556,29 @@ public ModifyTableResponse modifyTable(RpcController controller, ModifyTableRequ } } + @Override + public ReopenTableRegionsResponse reopenTableRegions(RpcController controller, + ReopenTableRegionsRequest request) throws ServiceException { + try { + server.checkInitialized(); + + final TableName tableName = ProtobufUtil.toTableName(request.getTableName()); + final List regionNames = request.getRegionNamesList().stream() + .map(ByteString::toByteArray).collect(Collectors.toList()); + + LOG.info("Reopening regions for table={}, regionCount={}", tableName, + regionNames.isEmpty() ? "all" : regionNames.size()); + + long procId = server.reopenRegionsThrottled(tableName, regionNames, request.getNonceGroup(), + request.getNonce()); + + return ReopenTableRegionsResponse.newBuilder().setProcId(procId).build(); + + } catch (IOException ioe) { + throw new ServiceException(ioe); + } + } + @Override public ModifyTableStoreFileTrackerResponse modifyTableStoreFileTracker(RpcController controller, ModifyTableStoreFileTrackerRequest req) throws ServiceException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java index 6e3491a24a88..88b958112938 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java @@ -25,8 +25,10 @@ import java.util.Optional; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.conf.ConfigKey; import org.apache.hadoop.hbase.master.assignment.RegionStateNode; @@ -89,7 +91,7 @@ public class ReopenTableRegionsProcedure /** * Create a new ReopenTableRegionsProcedure respecting the throttling configuration for the table. * First check the table descriptor, then fall back to the global configuration. Only used in - * ModifyTableProcedure. + * ModifyTableProcedure and in {@link HMaster#reopenRegionsThrottled}. */ public static ReopenTableRegionsProcedure throttled(final Configuration conf, final TableDescriptor desc) { @@ -103,6 +105,24 @@ public static ReopenTableRegionsProcedure throttled(final Configuration conf, return new ReopenTableRegionsProcedure(desc.getTableName(), backoffMillis, batchSizeMax); } + /** + * Create a new ReopenTableRegionsProcedure for specific regions, respecting the throttling + * configuration for the table. First check the table descriptor, then fall back to the global + * configuration. Only used in {@link HMaster#reopenRegionsThrottled}. + */ + public static ReopenTableRegionsProcedure throttled(final Configuration conf, + final TableDescriptor desc, final List regionNames) { + long backoffMillis = Optional.ofNullable(desc.getValue(PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY)) + .map(Long::parseLong).orElseGet(() -> conf.getLong(PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, + PROGRESSIVE_BATCH_BACKOFF_MILLIS_DEFAULT)); + int batchSizeMax = Optional.ofNullable(desc.getValue(PROGRESSIVE_BATCH_SIZE_MAX_KEY)) + .map(Integer::parseInt).orElseGet( + () -> conf.getInt(PROGRESSIVE_BATCH_SIZE_MAX_KEY, PROGRESSIVE_BATCH_SIZE_MAX_DISABLED)); + + return new ReopenTableRegionsProcedure(desc.getTableName(), regionNames, backoffMillis, + batchSizeMax); + } + public ReopenTableRegionsProcedure() { this(null); } @@ -116,12 +136,12 @@ public ReopenTableRegionsProcedure(final TableName tableName, final List PROGRESSIVE_BATCH_SIZE_MAX_DISABLED); } - ReopenTableRegionsProcedure(final TableName tableName, long reopenBatchBackoffMillis, + public ReopenTableRegionsProcedure(final TableName tableName, long reopenBatchBackoffMillis, int reopenBatchSizeMax) { this(tableName, Collections.emptyList(), reopenBatchBackoffMillis, reopenBatchSizeMax); } - private ReopenTableRegionsProcedure(final TableName tableName, final List regionNames, + public ReopenTableRegionsProcedure(final TableName tableName, final List regionNames, long reopenBatchBackoffMillis, int reopenBatchSizeMax) { this.tableName = tableName; this.regionNames = regionNames; @@ -190,67 +210,78 @@ private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) { @Override protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state) throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { - switch (state) { - case REOPEN_TABLE_REGIONS_GET_REGIONS: - if (!isTableEnabled(env)) { - LOG.info("Table {} is disabled, give up reopening its regions", tableName); - return Flow.NO_MORE_STATE; - } - List tableRegions = - env.getAssignmentManager().getRegionStates().getRegionsOfTableForReopen(tableName); - regions = getRegionLocationsForReopen(tableRegions); - setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS); - return Flow.HAS_MORE_STATE; - case REOPEN_TABLE_REGIONS_REOPEN_REGIONS: - // if we didn't finish reopening the last batch yet, let's keep trying until we do. - // at that point, the batch will be empty and we can generate a new batch - if (!regions.isEmpty() && currentRegionBatch.isEmpty()) { - currentRegionBatch = regions.stream().limit(reopenBatchSize).collect(Collectors.toList()); - batchesProcessed++; - } - for (HRegionLocation loc : currentRegionBatch) { - RegionStateNode regionNode = - env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion()); - // this possible, maybe the region has already been merged or split, see HBASE-20921 - if (regionNode == null) { - continue; + try { + switch (state) { + case REOPEN_TABLE_REGIONS_GET_REGIONS: + if (!isTableEnabled(env)) { + LOG.info("Table {} is disabled, give up reopening its regions", tableName); + return Flow.NO_MORE_STATE; } - TransitRegionStateProcedure proc; - regionNode.lock(); - try { - if (regionNode.getProcedure() != null) { + List tableRegions = + env.getAssignmentManager().getRegionStates().getRegionsOfTableForReopen(tableName); + regions = getRegionLocationsForReopen(tableRegions); + setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS); + return Flow.HAS_MORE_STATE; + case REOPEN_TABLE_REGIONS_REOPEN_REGIONS: + // if we didn't finish reopening the last batch yet, let's keep trying until we do. + // at that point, the batch will be empty and we can generate a new batch + if (!regions.isEmpty() && currentRegionBatch.isEmpty()) { + currentRegionBatch = + regions.stream().limit(reopenBatchSize).collect(Collectors.toList()); + batchesProcessed++; + } + for (HRegionLocation loc : currentRegionBatch) { + RegionStateNode regionNode = + env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion()); + // this possible, maybe the region has already been merged or split, see HBASE-20921 + if (regionNode == null) { continue; } - proc = TransitRegionStateProcedure.reopen(env, regionNode.getRegionInfo()); - regionNode.setProcedure(proc); - } finally { - regionNode.unlock(); + TransitRegionStateProcedure proc; + regionNode.lock(); + try { + if (regionNode.getProcedure() != null) { + continue; + } + proc = TransitRegionStateProcedure.reopen(env, regionNode.getRegionInfo()); + regionNode.setProcedure(proc); + } finally { + regionNode.unlock(); + } + addChildProcedure(proc); + regionsReopened++; + } + setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED); + return Flow.HAS_MORE_STATE; + case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED: + // update region lists based on what's been reopened + regions = filterReopened(env, regions); + currentRegionBatch = filterReopened(env, currentRegionBatch); + + // existing batch didn't fully reopen, so try to resolve that first. + // since this is a retry, don't do the batch backoff + if (!currentRegionBatch.isEmpty()) { + return reopenIfSchedulable(env, currentRegionBatch, false); } - addChildProcedure(proc); - regionsReopened++; - } - setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED); - return Flow.HAS_MORE_STATE; - case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED: - // update region lists based on what's been reopened - regions = filterReopened(env, regions); - currentRegionBatch = filterReopened(env, currentRegionBatch); - - // existing batch didn't fully reopen, so try to resolve that first. - // since this is a retry, don't do the batch backoff - if (!currentRegionBatch.isEmpty()) { - return reopenIfSchedulable(env, currentRegionBatch, false); - } - if (regions.isEmpty()) { - return Flow.NO_MORE_STATE; - } + if (regions.isEmpty()) { + return Flow.NO_MORE_STATE; + } - // current batch is finished, schedule more regions - return reopenIfSchedulable(env, regions, true); - default: - throw new UnsupportedOperationException("unhandled state=" + state); + // current batch is finished, schedule more regions + return reopenIfSchedulable(env, regions, true); + default: + throw new UnsupportedOperationException("unhandled state=" + state); + } + } catch (IOException e) { + if (isRollbackSupported(state) || e instanceof DoNotRetryIOException) { + setFailure("master-reopen-table-regions", e); + } else { + LOG.warn("Retriable error trying to reopen regions for table={} (in state={})", tableName, + state, e); + } } + return Flow.HAS_MORE_STATE; } private List filterReopened(MasterProcedureEnv env, @@ -296,19 +327,33 @@ private void setBackoffState(long millis) { } private List - getRegionLocationsForReopen(List tableRegionsForReopen) { + getRegionLocationsForReopen(List tableRegionsForReopen) throws IOException { List regionsToReopen = new ArrayList<>(); if ( CollectionUtils.isNotEmpty(regionNames) && CollectionUtils.isNotEmpty(tableRegionsForReopen) ) { + List notFoundRegions = new ArrayList<>(); + for (byte[] regionName : regionNames) { + boolean found = false; for (HRegionLocation hRegionLocation : tableRegionsForReopen) { if (Bytes.equals(regionName, hRegionLocation.getRegion().getRegionName())) { regionsToReopen.add(hRegionLocation); + found = true; break; } } + if (!found) { + notFoundRegions.add(regionName); + } + } + + if (!notFoundRegions.isEmpty()) { + String regionNamesStr = + notFoundRegions.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")); + throw new UnknownRegionException( + "The following regions do not belong to table " + tableName + ": " + regionNamesStr); } } else { regionsToReopen = tableRegionsForReopen; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureSpecificRegions.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureSpecificRegions.java new file mode 100644 index 000000000000..a38ec490186d --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureSpecificRegions.java @@ -0,0 +1,442 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.SingleProcessHBaseCluster; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.UnknownRegionException; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestReopenTableRegionsProcedureSpecificRegions { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureSpecificRegions.class); + + private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); + private static final byte[] CF = Bytes.toBytes("cf"); + + private static SingleProcessHBaseCluster singleProcessHBaseCluster; + + @BeforeClass + public static void setupCluster() throws Exception { + Configuration conf = UTIL.getConfiguration(); + conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); + singleProcessHBaseCluster = UTIL.startMiniCluster(1); + } + + @AfterClass + public static void tearDown() throws Exception { + UTIL.shutdownMiniCluster(); + if (Objects.nonNull(singleProcessHBaseCluster)) { + singleProcessHBaseCluster.close(); + } + } + + private ProcedureExecutor getProcExec() { + return UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); + } + + @Test + public void testInvalidRegionNamesThrowsException() throws Exception { + TableName tableName = TableName.valueOf("TestInvalidRegions"); + try (Table ignored = UTIL.createTable(tableName, CF)) { + + List regions = UTIL.getAdmin().getRegions(tableName); + assertFalse("Table should have at least one region", regions.isEmpty()); + + List invalidRegionNames = + Collections.singletonList(Bytes.toBytes("non-existent-region-name")); + + ReopenTableRegionsProcedure proc = + new ReopenTableRegionsProcedure(tableName, invalidRegionNames, 0L, Integer.MAX_VALUE); + + long procId = getProcExec().submitProcedure(proc); + UTIL.waitFor(60000, proc::isFailed); + + Throwable cause = ProcedureTestingUtility.getExceptionCause(proc); + assertTrue("Expected UnknownRegionException, got: " + cause.getClass().getName(), + cause instanceof UnknownRegionException); + assertTrue("Error message should contain region name", + cause.getMessage().contains("non-existent-region-name")); + assertTrue("Error message should contain table name", + cause.getMessage().contains(tableName.getNameAsString())); + } + } + + @Test + public void testMixedValidInvalidRegions() throws Exception { + TableName tableName = TableName.valueOf("TestMixedRegions"); + try (Table ignored = UTIL.createTable(tableName, CF)) { + + List actualRegions = UTIL.getAdmin().getRegions(tableName); + assertFalse("Table should have at least one region", actualRegions.isEmpty()); + + List mixedRegionNames = new ArrayList<>(); + mixedRegionNames.add(actualRegions.get(0).getRegionName()); + mixedRegionNames.add(Bytes.toBytes("invalid-region-1")); + mixedRegionNames.add(Bytes.toBytes("invalid-region-2")); + + ReopenTableRegionsProcedure proc = + new ReopenTableRegionsProcedure(tableName, mixedRegionNames, 0L, Integer.MAX_VALUE); + + long procId = getProcExec().submitProcedure(proc); + UTIL.waitFor(60000, proc::isFailed); + + Throwable cause = ProcedureTestingUtility.getExceptionCause(proc); + assertTrue("Expected UnknownRegionException", cause instanceof UnknownRegionException); + assertTrue("Error message should contain first invalid region", + cause.getMessage().contains("invalid-region-1")); + assertTrue("Error message should contain second invalid region", + cause.getMessage().contains("invalid-region-2")); + } + } + + @Test + public void testSpecificRegionsReopenWithThrottling() throws Exception { + TableName tableName = TableName.valueOf("TestSpecificThrottled"); + + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)) + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, "100") + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_SIZE_MAX_KEY, "2").build(); + + UTIL.getAdmin().createTable(td, Bytes.toBytes("a"), Bytes.toBytes("z"), 5); + + List allRegions = UTIL.getAdmin().getRegions(tableName); + assertEquals(5, allRegions.size()); + + List specificRegionNames = + allRegions.subList(0, 3).stream().map(RegionInfo::getRegionName).collect(Collectors.toList()); + + ReopenTableRegionsProcedure proc = ReopenTableRegionsProcedure.throttled( + UTIL.getConfiguration(), UTIL.getAdmin().getDescriptor(tableName), specificRegionNames); + + long procId = getProcExec().submitProcedure(proc); + ProcedureTestingUtility.waitProcedure(getProcExec(), procId); + + assertFalse("Procedure should succeed", proc.isFailed()); + assertEquals("Should reopen exactly 3 regions", 3, proc.getRegionsReopened()); + assertTrue("Should process multiple batches with batch size 2", + proc.getBatchesProcessed() >= 2); + } + + @Test + public void testEmptyRegionListReopensAll() throws Exception { + TableName tableName = TableName.valueOf("TestEmptyList"); + + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).build(); + + UTIL.getAdmin().createTable(td, Bytes.toBytes("a"), Bytes.toBytes("z"), 5); + + List allRegions = UTIL.getAdmin().getRegions(tableName); + assertEquals(5, allRegions.size()); + + ReopenTableRegionsProcedure proc = ReopenTableRegionsProcedure + .throttled(UTIL.getConfiguration(), UTIL.getAdmin().getDescriptor(tableName)); + + long procId = getProcExec().submitProcedure(proc); + ProcedureTestingUtility.waitProcedure(getProcExec(), procId); + + assertFalse("Procedure should succeed", proc.isFailed()); + assertEquals("Should reopen all 5 regions", 5, proc.getRegionsReopened()); + } + + @Test + public void testDisabledTableSkipsReopen() throws Exception { + TableName tableName = TableName.valueOf("TestDisabledTable"); + try (Table ignored = UTIL.createTable(tableName, CF)) { + UTIL.getAdmin().disableTable(tableName); + + ReopenTableRegionsProcedure proc = ReopenTableRegionsProcedure + .throttled(UTIL.getConfiguration(), UTIL.getAdmin().getDescriptor(tableName)); + + long procId = getProcExec().submitProcedure(proc); + ProcedureTestingUtility.waitProcedure(getProcExec(), procId); + + assertFalse("Procedure should succeed", proc.isFailed()); + assertEquals("Should not reopen any regions for disabled table", 0, + proc.getRegionsReopened()); + } + } + + @Test + public void testReopenRegionsThrottledWithLargeTable() throws Exception { + TableName tableName = TableName.valueOf("TestLargeTable"); + + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)) + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, "50") + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_SIZE_MAX_KEY, "3").build(); + + UTIL.getAdmin().createTable(td, Bytes.toBytes("a"), Bytes.toBytes("z"), 10); + + List regions = UTIL.getAdmin().getRegions(tableName); + assertEquals(10, regions.size()); + + ReopenTableRegionsProcedure proc = ReopenTableRegionsProcedure + .throttled(UTIL.getConfiguration(), UTIL.getAdmin().getDescriptor(tableName)); + + long procId = getProcExec().submitProcedure(proc); + ProcedureTestingUtility.waitProcedure(getProcExec(), procId); + + assertFalse("Procedure should succeed", proc.isFailed()); + assertEquals("Should reopen all 10 regions", 10, proc.getRegionsReopened()); + assertTrue("Should process multiple batches", proc.getBatchesProcessed() >= 4); + } + + @Test + public void testConfigurationPrecedence() throws Exception { + TableName tableName = TableName.valueOf("TestConfigPrecedence"); + + Configuration conf = UTIL.getConfiguration(); + conf.setLong(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, 1000); + conf.setInt(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_SIZE_MAX_KEY, 5); + + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)) + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, "2000") + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_SIZE_MAX_KEY, "2").build(); + + UTIL.getAdmin().createTable(td); + + ReopenTableRegionsProcedure proc = + ReopenTableRegionsProcedure.throttled(conf, UTIL.getAdmin().getDescriptor(tableName)); + + assertEquals("Table descriptor config should override global config", 2000, + proc.getReopenBatchBackoffMillis()); + } + + @Test + public void testThrottledVsUnthrottled() throws Exception { + TableName tableName = TableName.valueOf("TestThrottledVsUnthrottled"); + + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)) + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, "1000") + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_SIZE_MAX_KEY, "2").build(); + + UTIL.getAdmin().createTable(td, Bytes.toBytes("a"), Bytes.toBytes("z"), 5); + + List regions = UTIL.getAdmin().getRegions(tableName); + List regionNames = + regions.stream().map(RegionInfo::getRegionName).collect(Collectors.toList()); + + ReopenTableRegionsProcedure unthrottledProc = + new ReopenTableRegionsProcedure(tableName, regionNames); + assertEquals("Unthrottled should use default (0ms)", 0, + unthrottledProc.getReopenBatchBackoffMillis()); + + ReopenTableRegionsProcedure throttledProc = ReopenTableRegionsProcedure + .throttled(UTIL.getConfiguration(), UTIL.getAdmin().getDescriptor(tableName), regionNames); + assertEquals("Throttled should use table config (1000ms)", 1000, + throttledProc.getReopenBatchBackoffMillis()); + } + + @Test + public void testExceptionInProcedureExecution() throws Exception { + TableName tableName = TableName.valueOf("TestExceptionInExecution"); + try (Table ignored = UTIL.createTable(tableName, CF)) { + + List invalidRegionNames = + Collections.singletonList(Bytes.toBytes("nonexistent-region")); + + ReopenTableRegionsProcedure proc = + new ReopenTableRegionsProcedure(tableName, invalidRegionNames, 0L, Integer.MAX_VALUE); + + long procId = getProcExec().submitProcedure(proc); + UTIL.waitFor(60000, () -> getProcExec().isFinished(procId)); + + Procedure result = getProcExec().getResult(procId); + assertTrue("Procedure should have failed", result.isFailed()); + + Throwable cause = ProcedureTestingUtility.getExceptionCause(result); + assertTrue("Should be UnknownRegionException", cause instanceof UnknownRegionException); + } + } + + @Test + public void testSerializationWithRegionNames() throws Exception { + TableName tableName = TableName.valueOf("TestSerialization"); + try (Table ignored = UTIL.createTable(tableName, CF)) { + + List regions = UTIL.getAdmin().getRegions(tableName); + List regionNames = + regions.stream().map(RegionInfo::getRegionName).collect(Collectors.toList()); + + ReopenTableRegionsProcedure proc = + new ReopenTableRegionsProcedure(tableName, regionNames, 500L, 3); + + long procId = getProcExec().submitProcedure(proc); + ProcedureTestingUtility.waitProcedure(getProcExec(), procId); + + assertEquals("TableName should be preserved", tableName, proc.getTableName()); + assertEquals("Backoff should be preserved", 500L, proc.getReopenBatchBackoffMillis()); + } + } + + @Test + public void testAllRegionsWithValidNames() throws Exception { + TableName tableName = TableName.valueOf("TestAllValidRegions"); + try (Table ignored = UTIL.createTable(tableName, CF)) { + + List actualRegions = UTIL.getAdmin().getRegions(tableName); + assertFalse("Table should have regions", actualRegions.isEmpty()); + + List validRegionNames = + actualRegions.stream().map(RegionInfo::getRegionName).collect(Collectors.toList()); + + ReopenTableRegionsProcedure proc = + new ReopenTableRegionsProcedure(tableName, validRegionNames, 0L, Integer.MAX_VALUE); + + long procId = getProcExec().submitProcedure(proc); + ProcedureTestingUtility.waitProcedure(getProcExec(), procId); + + assertFalse("Procedure should succeed with all valid regions", proc.isFailed()); + assertEquals("Should reopen all specified regions", actualRegions.size(), + proc.getRegionsReopened()); + } + } + + @Test + public void testSingleInvalidRegion() throws Exception { + TableName tableName = TableName.valueOf("TestSingleInvalid"); + try (Table ignored = UTIL.createTable(tableName, CF)) { + + List invalidRegionNames = + Collections.singletonList(Bytes.toBytes("totally-fake-region")); + + ReopenTableRegionsProcedure proc = + new ReopenTableRegionsProcedure(tableName, invalidRegionNames, 0L, Integer.MAX_VALUE); + + long procId = getProcExec().submitProcedure(proc); + UTIL.waitFor(60000, proc::isFailed); + + Throwable cause = ProcedureTestingUtility.getExceptionCause(proc); + assertTrue("Expected UnknownRegionException", cause instanceof UnknownRegionException); + assertTrue("Error message should list the invalid region", + cause.getMessage().contains("totally-fake-region")); + } + } + + @Test + public void testRecoveryAfterValidationFailure() throws Exception { + TableName tableName = TableName.valueOf("TestRecoveryValidation"); + try (Table ignored = UTIL.createTable(tableName, CF)) { + + List invalidRegionNames = + Collections.singletonList(Bytes.toBytes("invalid-for-recovery")); + + ReopenTableRegionsProcedure proc = + new ReopenTableRegionsProcedure(tableName, invalidRegionNames, 0L, Integer.MAX_VALUE); + + ProcedureExecutor procExec = getProcExec(); + long procId = procExec.submitProcedure(proc); + + UTIL.waitFor(60000, () -> procExec.isFinished(procId)); + + Procedure result = procExec.getResult(procId); + assertTrue("Procedure should fail validation", result.isFailed()); + + Throwable cause = ProcedureTestingUtility.getExceptionCause(result); + assertTrue("Should be UnknownRegionException", cause instanceof UnknownRegionException); + assertTrue("Error should mention the invalid region", + cause.getMessage().contains("invalid-for-recovery")); + } + } + + @Test + public void testEmptyTableWithNoRegions() throws Exception { + TableName tableName = TableName.valueOf("TestEmptyTable"); + + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).build(); + + UTIL.getAdmin().createTable(td); + + List regions = UTIL.getAdmin().getRegions(tableName); + int regionCount = regions.size(); + + ReopenTableRegionsProcedure proc = ReopenTableRegionsProcedure + .throttled(UTIL.getConfiguration(), UTIL.getAdmin().getDescriptor(tableName)); + + long procId = getProcExec().submitProcedure(proc); + ProcedureTestingUtility.waitProcedure(getProcExec(), procId); + + assertFalse("Procedure should complete successfully even with no regions", proc.isFailed()); + assertEquals("Should handle empty table gracefully", regionCount, proc.getRegionsReopened()); + } + + @Test + public void testConfigChangeDoesNotAffectRunningProcedure() throws Exception { + TableName tableName = TableName.valueOf("TestConfigChange"); + + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)) + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, "1000") + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_SIZE_MAX_KEY, "2").build(); + + UTIL.getAdmin().createTable(td, Bytes.toBytes("a"), Bytes.toBytes("z"), 5); + + ReopenTableRegionsProcedure proc = ReopenTableRegionsProcedure + .throttled(UTIL.getConfiguration(), UTIL.getAdmin().getDescriptor(tableName)); + + assertEquals("Initial config should be 1000ms", 1000L, proc.getReopenBatchBackoffMillis()); + + TableDescriptor modifiedTd = TableDescriptorBuilder.newBuilder(td) + .setValue(ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, "5000").build(); + UTIL.getAdmin().modifyTable(modifiedTd); + + assertEquals("Running procedure should keep original config", 1000L, + proc.getReopenBatchBackoffMillis()); + + long procId = getProcExec().submitProcedure(proc); + ProcedureTestingUtility.waitProcedure(getProcExec(), procId); + + assertFalse("Procedure should complete successfully", proc.isFailed()); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java index a59b2966b89d..ad5cd5eda797 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java @@ -431,6 +431,17 @@ public Future modifyTableAsync(TableDescriptor td, boolean reopenRegions) return admin.modifyTableAsync(td, reopenRegions); } + @Override + public Future reopenTableRegionsAsync(TableName tableName) throws IOException { + return admin.reopenTableRegionsAsync(tableName); + } + + @Override + public Future reopenTableRegionsAsync(TableName tableName, List regions) + throws IOException { + return admin.reopenTableRegionsAsync(tableName, regions); + } + public void shutdown() throws IOException { admin.shutdown(); } diff --git a/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java b/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java index 3d5a7e502e0a..76a8b41481be 100644 --- a/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java +++ b/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java @@ -750,6 +750,16 @@ public Future modifyTableAsync(TableDescriptor td, boolean reopenRegions) throw new NotImplementedException("modifyTableAsync not supported in ThriftAdmin"); } + @Override + public Future reopenTableRegionsAsync(TableName tableName) { + throw new NotImplementedException("reopenTableRegionsAsync not supported in ThriftAdmin"); + } + + @Override + public Future reopenTableRegionsAsync(TableName tableName, List regions) { + throw new NotImplementedException("reopenTableRegionsAsync not supported in ThriftAdmin"); + } + @Override public void shutdown() { throw new NotImplementedException("shutdown not supported in ThriftAdmin");