From 42b036f96a649ca3dc4b5b0512c2be9205ad1a45 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Sun, 10 Nov 2024 11:11:13 -0800 Subject: [PATCH 01/13] HDDS-11667. Validating DatanodeID on any request to the datanode Change-Id: I3a4604cedd9dc1857fa139dbcb53dcaa184ad9b1 --- .../hadoop/ozone/HddsDatanodeService.java | 18 ++- .../container/common/interfaces/Handler.java | 4 +- .../states/endpoint/VersionEndpointTask.java | 6 +- .../server/ratis/ContainerStateMachine.java | 11 +- .../server/ratis/XceiverServerRatis.java | 7 +- .../container/keyvalue/KeyValueHandler.java | 35 ++++ .../hadoop/ozone/TestHddsDatanodeService.java | 2 +- .../ozone/TestHddsSecureDatanodeInit.java | 2 +- .../hadoop/ozone/MiniOzoneChaosCluster.java | 6 +- .../apache/hadoop/ozone/MiniOzoneCluster.java | 13 +- .../hadoop/ozone/MiniOzoneClusterImpl.java | 15 +- .../hadoop/ozone/MiniOzoneHAClusterImpl.java | 8 +- .../TestContainerStateMachineFailures.java | 73 ++++++++- .../client/rpc/TestECKeyOutputStream.java | 152 +++++++++++++++--- .../ozone/shell/TestOzoneDatanodeShell.java | 2 +- 15 files changed, 298 insertions(+), 56 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java index 55aeb466e7f1..65d9fa0d3607 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java @@ -101,6 +101,7 @@ public class HddsDatanodeService extends GenericCli implements ServicePlugin { private static final Logger LOG = LoggerFactory.getLogger( HddsDatanodeService.class); + private final boolean terminateJVMOnDatanodeTerminate; private OzoneConfiguration conf; private SecurityConfig secConf; @@ -124,7 +125,9 @@ public class HddsDatanodeService extends GenericCli implements ServicePlugin { private ReconfigurationHandler reconfigurationHandler; //Constructor for DataNode PluginService - public HddsDatanodeService() { } + public HddsDatanodeService() { + this.terminateJVMOnDatanodeTerminate = true; + } /** * Create a Datanode instance based on the supplied command-line arguments. @@ -135,8 +138,8 @@ public HddsDatanodeService() { } * @param args command line arguments. */ @VisibleForTesting - public HddsDatanodeService(String[] args) { - this(false, args); + public HddsDatanodeService(String[] args, boolean terminateJVMOnDatanodeTerminate) { + this(terminateJVMOnDatanodeTerminate, false, args); } /** @@ -145,8 +148,9 @@ public HddsDatanodeService(String[] args) { * @param args command line arguments. * @param printBanner if true, then log a verbose startup message. */ - private HddsDatanodeService(boolean printBanner, String[] args) { + private HddsDatanodeService(boolean terminateJVMOnDatanodeTerminate, boolean printBanner, String[] args) { this.printBanner = printBanner; + this.terminateJVMOnDatanodeTerminate = terminateJVMOnDatanodeTerminate; this.args = args != null ? Arrays.copyOf(args, args.length) : null; } @@ -155,7 +159,7 @@ public static void main(String[] args) { OzoneNetUtils.disableJvmNetworkAddressCacheIfRequired( new OzoneConfiguration()); HddsDatanodeService hddsDatanodeService = - new HddsDatanodeService(true, args); + new HddsDatanodeService(true, true, args); hddsDatanodeService.run(args); } catch (Throwable e) { LOG.error("Exception in HddsDatanodeService.", e); @@ -524,7 +528,9 @@ public void join() { public void terminateDatanode() { stop(); - terminate(1); + if (terminateJVMOnDatanodeTerminate) { + terminate(1); + } } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java index bfdff69be46f..f1e637085e93 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.io.OutputStream; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; @@ -93,7 +94,8 @@ public abstract StateMachine.DataChannel getStreamDataChannel( * * @return datanode Id */ - protected String getDatanodeId() { + @VisibleForTesting + public String getDatanodeId() { return datanodeId; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java index 968c9b9a6e66..e224076b1c28 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java @@ -33,6 +33,7 @@ import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; import com.google.common.base.Preconditions; +import org.apache.ratis.protocol.exceptions.RaftException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,10 +59,9 @@ public VersionEndpointTask(EndpointStateMachine rpcEndPoint, * Computes a result, or throws an exception if unable to do so. * * @return computed result - * @throws Exception if unable to compute a result */ @Override - public EndpointStateMachine.EndPointStates call() throws Exception { + public EndpointStateMachine.EndPointStates call() { rpcEndPoint.lock(); try { @@ -105,7 +105,7 @@ public EndpointStateMachine.EndPointStates call() throws Exception { LOG.debug("Cannot execute GetVersion task as endpoint state machine " + "is in {} state", rpcEndPoint.getState()); } - } catch (DiskOutOfSpaceException | BindException ex) { + } catch (DiskOutOfSpaceException | BindException | RaftException ex) { rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); } catch (IOException ex) { rpcEndPoint.logIfNeeded(ex); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 1048ec5092c7..a303bf9b1a96 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -275,10 +275,15 @@ public CSMMetrics getMetrics() { } @Override - public void initialize( - RaftServer server, RaftGroupId id, RaftStorage raftStorage) - throws IOException { + public void initialize(RaftServer server, RaftGroupId id, RaftStorage raftStorage) throws IOException { super.initialize(server, id, raftStorage); + RaftPeer selfId = server.getPeer(); + Collection peers = server.getDivision(id).getGroup().getPeers(); + if (peers.stream().noneMatch(raftPeer -> raftPeer != null && raftPeer.equals(selfId))) { + throw new StateMachineException(String.format("Current datanodeId: %s is not part of the group : %s with " + + "quorum: %s", selfId, id, peers)); + } + storage.init(raftStorage); ratisServer.notifyGroupAdd(id); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index a4c143439852..a6161539571e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -85,6 +85,7 @@ import org.apache.ratis.proto.RaftProtos.RoleInfoProto; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.exceptions.NotLeaderException; +import org.apache.ratis.protocol.exceptions.RaftException; import org.apache.ratis.protocol.exceptions.StateMachineException; import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.GroupInfoReply; @@ -562,7 +563,11 @@ public void start() throws IOException { for (ThreadPoolExecutor executor : chunkExecutors) { executor.prestartAllCoreThreads(); } - server.start(); + try { + server.start(); + } catch (Exception e) { + throw new RaftException(e); + } RaftServerRpc serverRpc = server.getServerRpc(); clientPort = getRealPort(serverRpc.getClientServerAddress(), diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index aa9c4bd953c5..047ff0702cd2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -99,6 +99,7 @@ import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_NON_EMPTY_CONTAINER; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_OPEN_CONTAINER; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.GET_SMALL_FILE_ERROR; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.INVALID_ARGUMENT; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.INVALID_CONTAINER_STATE; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.IO_EXCEPTION; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.PUT_SMALL_FILE_ERROR; @@ -242,6 +243,15 @@ static ContainerCommandResponseProto dispatchRequest(KeyValueHandler handler, ContainerCommandRequestProto request, KeyValueContainer kvContainer, DispatcherContext dispatcherContext) { Type cmdType = request.getCmdType(); + // Validate the request has been made to the correct datanode with the node id matching. + if (kvContainer != null) { + try { + handler.validateRequestDatanodeId(kvContainer.getContainerData().getReplicaIndex(), + request.getDatanodeUuid()); + } catch (StorageContainerException e) { + return ContainerUtils.logAndReturnError(LOG, e, request); + } + } switch (cmdType) { case CreateContainer: @@ -353,6 +363,13 @@ ContainerCommandResponseProto handleCreateContainer( " already exists", null, CONTAINER_ALREADY_EXISTS), request); } + try { + this.validateRequestDatanodeId(request.getCreateContainer().hasReplicaIndex() ? + request.getCreateContainer().getReplicaIndex() : null, request.getDatanodeUuid()); + } catch (StorageContainerException e) { + return ContainerUtils.logAndReturnError(LOG, e, request); + } + long containerID = request.getContainerID(); ContainerLayoutVersion layoutVersion = @@ -1519,4 +1536,22 @@ public static FaultInjector getInjector() { public static void setInjector(FaultInjector instance) { injector = instance; } + + /** + * Verify if request's replicaIndex matches with containerData. This validates only for EC containers i.e. + * containerReplicaIdx should be > 0. + * + * @param containerReplicaIdx replicaIndex for the container command. + * @param requestDatanodeUUID requested block info + * @throws StorageContainerException if replicaIndex mismatches. + */ + private boolean validateRequestDatanodeId(Integer containerReplicaIdx, String requestDatanodeUUID) + throws StorageContainerException { + if (containerReplicaIdx != null && containerReplicaIdx > 0 && !requestDatanodeUUID.equals(this.getDatanodeId())) { + throw new StorageContainerException( + String.format("Request is trying to write to node with uuid : %s but the current nodeId is: %s .", + requestDatanodeUUID, this.getDatanodeId()), INVALID_ARGUMENT); + } + return true; + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java index e513412e377f..96026d343520 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java @@ -71,7 +71,7 @@ public class TestHddsDatanodeService { private final String clusterId = UUID.randomUUID().toString(); private final OzoneConfiguration conf = new OzoneConfiguration(); private final HddsDatanodeService service = - new HddsDatanodeService(new String[] {}); + new HddsDatanodeService(new String[] {}, true); private static final int SCM_SERVER_COUNT = 1; @BeforeEach diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java index 253551115dd3..23dcdda61c92 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java @@ -112,7 +112,7 @@ public static void setUp() throws Exception { conf.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT1S"); // 1s securityConfig = new SecurityConfig(conf); - service = new HddsDatanodeService(args) { + service = new HddsDatanodeService(args, true) { @Override SCMSecurityProtocolClientSideTranslatorPB createScmSecurityClient() throws IOException { diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java index 3c12bab4323b..0f45eee47f08 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java @@ -105,9 +105,9 @@ public static FailureService of(String serviceName) { public MiniOzoneChaosCluster(OzoneConfiguration conf, OMHAService omService, SCMHAService scmService, List hddsDatanodes, String clusterPath, - Set> clazzes) { + Set> clazzes, boolean terminateJVMOnDNTerminate) { super(conf, new SCMConfigurator(), omService, scmService, hddsDatanodes, - clusterPath, null); + clusterPath, null, terminateJVMOnDNTerminate); this.numDatanodes = getHddsDatanodes().size(); this.numOzoneManagers = omService.getServices().size(); this.numStorageContainerManagers = scmService.getServices().size(); @@ -304,7 +304,7 @@ public MiniOzoneChaosCluster build() throws IOException { MiniOzoneChaosCluster cluster = new MiniOzoneChaosCluster(conf, omService, scmService, hddsDatanodes, - path, clazzes); + path, clazzes, terminateJVMOnDatanodeExit); if (startDataNodes) { cluster.startHddsDatanodes(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java index ff55ee83c176..abdf662ee589 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java @@ -307,13 +307,14 @@ abstract class Builder { protected int dnInitialVersion = DatanodeVersion.FUTURE_VERSION.toProtoValue(); protected int dnCurrentVersion = DatanodeVersion.COMBINED_PUTBLOCK_WRITECHUNK_RPC.toProtoValue(); - + protected boolean terminateJVMOnDatanodeExit = true; protected int numOfDatanodes = 3; protected boolean startDataNodes = true; protected CertificateClient certClient; protected SecretKeyClient secretKeyClient; protected DatanodeFactory dnFactory = UniformDatanodesFactory.newBuilder().build(); + protected Builder(OzoneConfiguration conf) { this.conf = conf; setClusterId(); @@ -340,6 +341,16 @@ private void setClusterId() { MiniOzoneClusterImpl.class.getSimpleName() + "-" + clusterId); } + /** + * For tests where datanode failure is to be tested. + * @param terminateJVMOnDatanodeExit if false will prevent datanode from terminating. + * @return + */ + public Builder setTerminateJVMOnDatanodeExit(boolean terminateJVMOnDatanodeExit) { + this.terminateJVMOnDatanodeExit = terminateJVMOnDatanodeExit; + return this; + } + /** * For tests that do not use any features of SCM, we can get by with * 0 datanodes. Also need to skip safemode in this case. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 3594996856af..0ea1c47a1fc9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -139,6 +139,7 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster { private final List hddsDatanodes; private ReconServer reconServer; private Gateway s3g; + private final boolean terminateJVMOnDNTerminate; // Timeout for the cluster to be ready private int waitForClusterToBeReadyTimeout = 120000; // 2 min @@ -150,13 +151,14 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster { /** * Creates a new MiniOzoneCluster with Recon. */ + @SuppressWarnings("checkstyle:parameternumber") private MiniOzoneClusterImpl(OzoneConfiguration conf, SCMConfigurator scmConfigurator, OzoneManager ozoneManager, StorageContainerManager scm, List hddsDatanodes, ReconServer reconServer, - Gateway s3g) { + Gateway s3g, boolean terminateJVMOnDNTerminate) { this.conf = conf; this.ozoneManager = ozoneManager; this.scm = scm; @@ -164,6 +166,7 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf, this.reconServer = reconServer; this.scmConfigurator = scmConfigurator; this.s3g = s3g; + this.terminateJVMOnDNTerminate = terminateJVMOnDNTerminate; } /** @@ -173,11 +176,12 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf, * OzoneManagers and StorageContainerManagers. */ MiniOzoneClusterImpl(OzoneConfiguration conf, SCMConfigurator scmConfigurator, - List hddsDatanodes, ReconServer reconServer) { + List hddsDatanodes, ReconServer reconServer, boolean terminateJVMOnDNTerminate) { this.scmConfigurator = scmConfigurator; this.conf = conf; this.hddsDatanodes = hddsDatanodes; this.reconServer = reconServer; + this.terminateJVMOnDNTerminate = terminateJVMOnDNTerminate; } public SCMConfigurator getSCMConfigurator() { @@ -448,7 +452,7 @@ public void restartHddsDatanode(int i, boolean waitForDatanode) // wait for node to be removed from SCM healthy node list. waitForHddsDatanodeToStop(datanodeService.getDatanodeDetails()); } - HddsDatanodeService service = new HddsDatanodeService(NO_ARGS); + HddsDatanodeService service = new HddsDatanodeService(NO_ARGS, terminateJVMOnDNTerminate); service.setConfiguration(config); hddsDatanodes.add(i, service); startHddsDatanode(service); @@ -659,7 +663,7 @@ public MiniOzoneCluster build() throws IOException { MiniOzoneClusterImpl cluster = new MiniOzoneClusterImpl(conf, scmConfigurator, om, scm, - hddsDatanodes, reconServer, s3g); + hddsDatanodes, reconServer, s3g, terminateJVMOnDatanodeExit); cluster.setCAClient(certClient); cluster.setSecretKeyClient(secretKeyClient); @@ -875,7 +879,7 @@ protected List createHddsDatanodes() for (int i = 0; i < numOfDatanodes; i++) { OzoneConfiguration dnConf = dnFactory.apply(conf); - HddsDatanodeService datanode = new HddsDatanodeService(NO_ARGS); + HddsDatanodeService datanode = new HddsDatanodeService(NO_ARGS, terminateJVMOnDatanodeExit); datanode.setConfiguration(dnConf); hddsDatanodes.add(datanode); } @@ -934,6 +938,5 @@ private void configureS3G() { OzoneConfigurationHolder.setConfiguration(conf); } - } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java index 39c2250b73c3..c939a147aac3 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java @@ -80,6 +80,7 @@ public class MiniOzoneHAClusterImpl extends MiniOzoneClusterImpl { private static final int RATIS_RPC_TIMEOUT = 1000; // 1 second public static final int NODE_FAILURE_TIMEOUT = 2000; // 2 seconds + @SuppressWarnings("checkstyle:parameternumber") public MiniOzoneHAClusterImpl( OzoneConfiguration conf, SCMConfigurator scmConfigurator, @@ -87,8 +88,9 @@ public MiniOzoneHAClusterImpl( SCMHAService scmhaService, List hddsDatanodes, String clusterPath, - ReconServer reconServer) { - super(conf, scmConfigurator, hddsDatanodes, reconServer); + ReconServer reconServer, + boolean terminateJVMOnDNTerminate) { + super(conf, scmConfigurator, hddsDatanodes, reconServer, terminateJVMOnDNTerminate); this.omhaService = omhaService; this.scmhaService = scmhaService; this.clusterMetaPath = clusterPath; @@ -432,7 +434,7 @@ public MiniOzoneHAClusterImpl build() throws IOException { MiniOzoneHAClusterImpl cluster = new MiniOzoneHAClusterImpl(conf, scmConfigurator, omService, scmService, hddsDatanodes, path, - reconServer); + reconServer, terminateJVMOnDatanodeExit); if (startDataNodes) { cluster.startHddsDatanodes(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java index b6eaca8e80d0..40003c0c4efa 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java @@ -19,6 +19,7 @@ import java.io.File; import java.io.IOException; +import java.io.UncheckedIOException; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.time.Duration; @@ -32,6 +33,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.HddsUtils; @@ -40,6 +42,7 @@ import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.conf.DatanodeRatisServerConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.ratis.conf.RatisClientConfig; @@ -50,6 +53,8 @@ import org.apache.hadoop.hdds.scm.client.HddsClientUtils; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; @@ -67,6 +72,7 @@ import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; import org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine; import org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; @@ -171,7 +177,7 @@ public static void init() throws Exception { conf.setLong(OzoneConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_KEY, 1); conf.setQuietMode(false); cluster = - MiniOzoneCluster.newBuilder(conf).setNumDatanodes(10) + MiniOzoneCluster.newBuilder(conf).setNumDatanodes(10).setTerminateJVMOnDatanodeExit(false) .build(); cluster.waitForClusterToBeReady(); cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 60000); @@ -264,6 +270,71 @@ public void testContainerStateMachineCloseOnMissingPipeline() key.close(); } + + @Test + public void testContainerStateMachineRestartWithDNChangePipeline() + throws Exception { + try (OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName) + .createKey("testDNRestart", 1024, ReplicationConfig.fromTypeAndFactor(ReplicationType.RATIS, + ReplicationFactor.THREE), new HashMap<>())) { + key.write("ratis".getBytes(UTF_8)); + key.flush(); + + KeyOutputStream groupOutputStream = (KeyOutputStream) key. + getOutputStream(); + List locationInfoList = + groupOutputStream.getLocationInfoList(); + assertEquals(1, locationInfoList.size()); + + OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0); + Pipeline pipeline = omKeyLocationInfo.getPipeline(); + List datanodes = + new ArrayList<>(TestHelper.getDatanodeServices(cluster, + pipeline)); + + DatanodeDetails dn = datanodes.get(0).getDatanodeDetails(); + int index = cluster.getHddsDatanodeIndex(dn); + + // Delete all data volumes. + cluster.getHddsDatanode(dn).getDatanodeStateMachine().getContainer().getVolumeSet().getVolumesList() + .stream().forEach(v -> { + try { + FileUtils.deleteDirectory(v.getStorageDir()); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + // Delete datanode.id datanodeIdFile. + File datanodeIdFile = new File(HddsServerUtil.getDatanodeIdFilePath(cluster.getHddsDatanode(dn).getConf())); + boolean deleted = datanodeIdFile.delete(); + assertTrue(deleted); + cluster.restartHddsDatanode(dn, false); + GenericTestUtils.waitFor(() -> cluster.getHddsDatanodes().get(index).getDatanodeStateMachine() + .getContext().getState() == DatanodeStateMachine.DatanodeStates.SHUTDOWN, 1000, 30000); + key.write("ratis".getBytes(UTF_8)); + // Delete raft meta and restart dn, now datanode should come up. + cluster.getHddsDatanodes().get(index) + .getDatanodeStateMachine().getContainer().getMetaVolumeSet().getVolumesList() + .stream().forEach(v -> { + try { + FileUtils.deleteDirectory(v.getStorageDir()); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + cluster.restartHddsDatanode(index, true); + + GenericTestUtils.waitFor(() -> { + try { + return cluster.getStorageContainerManager().getPipelineManager().getPipeline(pipeline.getId()).isClosed(); + } catch (PipelineNotFoundException e) { + throw new UncheckedIOException(e); + } + }, 1000, 30000); + } + } + @Test public void testContainerStateMachineFailures() throws Exception { OzoneOutputStream key = diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java index 5743866f2d2d..e798d7751e11 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java @@ -31,6 +31,8 @@ import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import org.apache.hadoop.hdds.utils.IOUtils; @@ -50,17 +52,26 @@ import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.container.TestHelper; +import org.apache.hadoop.ozone.container.common.interfaces.Handler; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.mockito.MockedStatic; +import org.mockito.Mockito; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicReference; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL; @@ -69,8 +80,10 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.fail; +import static org.mockito.ArgumentMatchers.any; /** * Tests key output stream. @@ -91,52 +104,56 @@ public class TestECKeyOutputStream { private static int inputSize = dataBlocks * chunkSize; private static byte[][] inputChunks = new byte[dataBlocks][chunkSize]; - /** - * Create a MiniDFSCluster for testing. - */ - @BeforeAll - protected static void init() throws Exception { - chunkSize = 1024 * 1024; - flushSize = 2 * chunkSize; - maxFlushSize = 2 * flushSize; - blockSize = 2 * maxFlushSize; - - OzoneClientConfig clientConfig = conf.getObject(OzoneClientConfig.class); + private static void initConf(OzoneConfiguration configuration) { + OzoneClientConfig clientConfig = configuration.getObject(OzoneClientConfig.class); clientConfig.setChecksumType(ContainerProtos.ChecksumType.NONE); clientConfig.setStreamBufferFlushDelay(false); - conf.setFromObject(clientConfig); + configuration.setFromObject(clientConfig); // If SCM detects dead node too quickly, then container would be moved to // closed state and all in progress writes will get exception. To avoid // that, we are just keeping higher timeout and none of the tests depending // on deadnode detection timeout currently. - conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 30, TimeUnit.SECONDS); - conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 60, TimeUnit.SECONDS); - conf.setTimeDuration("hdds.ratis.raft.server.rpc.slowness.timeout", 300, + configuration.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 30, TimeUnit.SECONDS); + configuration.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 60, TimeUnit.SECONDS); + configuration.setTimeDuration("hdds.ratis.raft.server.rpc.slowness.timeout", 300, TimeUnit.SECONDS); - conf.setTimeDuration( + configuration.set("ozone.replication.allowed-configs", "(^((STANDALONE|RATIS)/(ONE|THREE))|(EC/(3-2|6-3|10-4)-" + + "(512|1024|2048|4096|1)k)$)"); + configuration.setTimeDuration( "hdds.ratis.raft.server.notification.no-leader.timeout", 300, TimeUnit.SECONDS); - conf.setQuietMode(false); - conf.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 4, + configuration.setQuietMode(false); + configuration.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 4, StorageUnit.MB); - conf.setTimeDuration(HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL, 500, + configuration.setTimeDuration(HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL, 500, TimeUnit.MILLISECONDS); - conf.setTimeDuration(HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL, 1, + configuration.setTimeDuration(HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS); - conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); + configuration.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); // "Enable" hsync to verify that hsync would be blocked by ECKeyOutputStream - conf.setBoolean(OzoneConfigKeys.OZONE_HBASE_ENHANCEMENTS_ALLOWED, true); - conf.setBoolean("ozone.client.hbase.enhancements.allowed", true); - conf.setBoolean(OzoneConfigKeys.OZONE_FS_HSYNC_ENABLED, true); + configuration.setBoolean(OzoneConfigKeys.OZONE_HBASE_ENHANCEMENTS_ALLOWED, true); + configuration.setBoolean("ozone.client.hbase.enhancements.allowed", true); + configuration.setBoolean(OzoneConfigKeys.OZONE_FS_HSYNC_ENABLED, true); ClientConfigForTesting.newBuilder(StorageUnit.BYTES) .setBlockSize(blockSize) .setChunkSize(chunkSize) .setStreamBufferFlushSize(flushSize) .setStreamBufferMaxSize(maxFlushSize) - .applyTo(conf); + .applyTo(configuration); + } + /** + * Create a MiniDFSCluster for testing. + */ + @BeforeAll + protected static void init() throws Exception { + chunkSize = 1024 * 1024; + flushSize = 2 * chunkSize; + maxFlushSize = 2 * flushSize; + blockSize = 2 * maxFlushSize; + initConf(conf); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(10) .build(); @@ -172,6 +189,91 @@ public void testCreateKeyWithECReplicationConfig() throws Exception { } } + @Test + public void testECKeyCreatetWithDatanodeIdChange() + throws Exception { + AtomicReference mock = new AtomicReference<>(false); + AtomicReference failed = new AtomicReference<>(false); + AtomicReference miniOzoneCluster = new AtomicReference<>(); + OzoneClient client1 = null; + try (MockedStatic mockedHandler = Mockito.mockStatic(Handler.class, Mockito.CALLS_REAL_METHODS)) { + Map handlers = new HashMap<>(); + mockedHandler.when(() -> Handler.getHandlerForContainerType(any(), any(), any(), any(), any(), any(), any())) + .thenAnswer(i -> { + Handler handler = Mockito.spy((Handler) i.callRealMethod()); + handlers.put(handler.getDatanodeId(), handler); + return handler; + }); + OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + initConf(ozoneConfiguration); + miniOzoneCluster.set(MiniOzoneCluster.newBuilder(ozoneConfiguration).setNumDatanodes(10).build()); + miniOzoneCluster.get().waitForClusterToBeReady(); + client1 = miniOzoneCluster.get().newClient(); + ObjectStore store = client1.getObjectStore(); + store.createVolume(volumeName); + store.getVolume(volumeName).createBucket(bucketName); + OzoneOutputStream key = TestHelper.createKey(keyString, new ECReplicationConfig(3, 2, + ECReplicationConfig.EcCodec.RS, 1024), inputSize, store, volumeName, bucketName); + byte[] b = new byte[6 * 1024]; + ECKeyOutputStream groupOutputStream = (ECKeyOutputStream) key.getOutputStream(); + List locationInfoList = groupOutputStream.getLocationInfoList(); + while (locationInfoList.isEmpty()) { + locationInfoList = groupOutputStream.getLocationInfoList(); + Random random = new Random(); + random.nextBytes(b); + assertInstanceOf(ECKeyOutputStream.class, key.getOutputStream()); + key.write(b); + key.flush(); + } + + assertEquals(1, locationInfoList.size()); + + OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0); + long containerId = omKeyLocationInfo.getContainerID(); + Pipeline pipeline = omKeyLocationInfo.getPipeline(); + DatanodeDetails dnWithReplicaIndex1 = + pipeline.getReplicaIndexes().entrySet().stream().filter(e -> e.getValue() == 1).map(Map.Entry::getKey) + .findFirst().get(); + Mockito.when(handlers.get(dnWithReplicaIndex1.getUuidString()).getDatanodeId()) + .thenAnswer(i -> { + if (!failed.get()) { + // Change dnId for one write chunk request. + failed.set(true); + return dnWithReplicaIndex1.getUuidString() + "_failed"; + } else { + return dnWithReplicaIndex1.getUuidString(); + } + }); + locationInfoList = groupOutputStream.getLocationInfoList(); + while (locationInfoList.size() == 1) { + locationInfoList = groupOutputStream.getLocationInfoList(); + Random random = new Random(); + random.nextBytes(b); + assertInstanceOf(ECKeyOutputStream.class, key.getOutputStream()); + key.write(b); + key.flush(); + } + assertEquals(2, locationInfoList.size()); + assertNotEquals(locationInfoList.get(1).getPipeline().getId(), pipeline.getId()); + GenericTestUtils.waitFor(() -> { + try { + return miniOzoneCluster.get().getStorageContainerManager().getContainerManager() + .getContainer(ContainerID.valueOf(containerId)).getState().equals( + HddsProtos.LifeCycleState.CLOSED); + } catch (ContainerNotFoundException e) { + throw new RuntimeException(e); + } + }, 1000, 30000); + key.close(); + Assertions.assertTrue(failed.get()); + } finally { + IOUtils.closeQuietly(client1); + if (miniOzoneCluster.get() != null) { + miniOzoneCluster.get().shutdown(); + } + } + } + @Test public void testCreateKeyWithOutBucketDefaults() throws Exception { OzoneVolume volume = objectStore.getVolume(volumeName); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDatanodeShell.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDatanodeShell.java index c40e2e009b6b..c07fc21b2c49 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDatanodeShell.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDatanodeShell.java @@ -122,7 +122,7 @@ public void testDatanodeInvalidParamCommand() { private static class TestHddsDatanodeService extends HddsDatanodeService { TestHddsDatanodeService(String[] args) { - super(args); + super(args, true); } @Override From e1117359ca4c24a938289268064eb413603762e7 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Sun, 10 Nov 2024 12:29:26 -0800 Subject: [PATCH 02/13] HDDS-11667. Fix findbugs Change-Id: Ia33fbb2c8eb755d77b966275b2f03f1a72a569c6 --- .../apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java index e798d7751e11..20e65291faf2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java @@ -192,7 +192,6 @@ public void testCreateKeyWithECReplicationConfig() throws Exception { @Test public void testECKeyCreatetWithDatanodeIdChange() throws Exception { - AtomicReference mock = new AtomicReference<>(false); AtomicReference failed = new AtomicReference<>(false); AtomicReference miniOzoneCluster = new AtomicReference<>(); OzoneClient client1 = null; From fcd8c5efc057978c3bd6255d1e12a65447779a99 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Sun, 10 Nov 2024 13:54:06 -0800 Subject: [PATCH 03/13] HDDS-11667. Fix tests Change-Id: Ided9ba2da3bcc907fa4e25644def6ef16b3f8cc6 --- .../ozone/container/keyvalue/TestKeyValueHandler.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 2637f1922c68..655ecbb48b43 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -68,6 +68,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mockito; import static org.mockito.Mockito.doCallRealMethod; import static org.mockito.Mockito.mock; @@ -131,7 +132,13 @@ public void testHandlerCommandHandling() throws Exception { .build(); KeyValueContainer container = mock(KeyValueContainer.class); - + KeyValueContainerData containerData = mock(KeyValueContainerData.class); + Mockito.when(container.getContainerData()).thenReturn(containerData); + Mockito.when(containerData.getReplicaIndex()).thenReturn(1); + ContainerProtos.ContainerCommandResponseProto responseProto = KeyValueHandler.dispatchRequest(handler, + createContainerRequest, container, null); + assertEquals(ContainerProtos.Result.INVALID_ARGUMENT, responseProto.getResult()); + Mockito.when(handler.getDatanodeId()).thenReturn(DATANODE_UUID); KeyValueHandler .dispatchRequest(handler, createContainerRequest, container, null); verify(handler, times(0)).handleListBlock( From 1d85c17c829069033777ac9124ffbee2de48dd8d Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Sun, 10 Nov 2024 20:00:51 -0800 Subject: [PATCH 04/13] HDDS-11667. Fix tests Change-Id: Id81b19200015d5b7d5c578a0314d570bb63755de --- .../common/transport/server/ratis/ContainerStateMachine.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index a303bf9b1a96..57fd0c45f043 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -279,7 +279,9 @@ public void initialize(RaftServer server, RaftGroupId id, RaftStorage raftStorag super.initialize(server, id, raftStorage); RaftPeer selfId = server.getPeer(); Collection peers = server.getDivision(id).getGroup().getPeers(); - if (peers.stream().noneMatch(raftPeer -> raftPeer != null && raftPeer.equals(selfId))) { + // If peers list is empty then it means Ratis hasn't created any raft--meta file containing the last applied + // transaction. + if (!peers.isEmpty() && peers.stream().noneMatch(raftPeer -> raftPeer != null && raftPeer.equals(selfId))) { throw new StateMachineException(String.format("Current datanodeId: %s is not part of the group : %s with " + "quorum: %s", selfId, id, peers)); } From fae02347a8d552f8babca1e2a30b3e577baebd3a Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 11 Nov 2024 00:55:47 -0800 Subject: [PATCH 05/13] HDDS-11667. Fix tests Change-Id: Iaaa88f6a53cce8448c6095d994cafbdb26d6eebc --- .../common/transport/server/ratis/ContainerStateMachine.java | 4 ++-- .../common/transport/server/ratis/XceiverServerRatis.java | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 57fd0c45f043..3e240617cf71 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -277,11 +277,11 @@ public CSMMetrics getMetrics() { @Override public void initialize(RaftServer server, RaftGroupId id, RaftStorage raftStorage) throws IOException { super.initialize(server, id, raftStorage); - RaftPeer selfId = server.getPeer(); + RaftPeerId selfId = server.getId(); Collection peers = server.getDivision(id).getGroup().getPeers(); // If peers list is empty then it means Ratis hasn't created any raft--meta file containing the last applied // transaction. - if (!peers.isEmpty() && peers.stream().noneMatch(raftPeer -> raftPeer != null && raftPeer.equals(selfId))) { + if (!peers.isEmpty() && peers.stream().noneMatch(raftPeer -> raftPeer != null && raftPeer.getId().equals(selfId))) { throw new StateMachineException(String.format("Current datanodeId: %s is not part of the group : %s with " + "quorum: %s", selfId, id, peers)); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index a6161539571e..afeacd467b1c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -28,6 +28,7 @@ import java.util.Objects; import java.util.UUID; import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutionException; @@ -565,7 +566,7 @@ public void start() throws IOException { } try { server.start(); - } catch (Exception e) { + } catch (CompletionException e) { throw new RaftException(e); } From 989d5645d7748064765df1be538a688417564f4f Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 12 Nov 2024 19:19:33 -0800 Subject: [PATCH 06/13] HDDS-11667. Add validation to on apply transaction Change-Id: I833ebfa62089da43eaa72266da72962f94aba92b --- .../hadoop/hdds/protocol/DatanodeDetails.java | 10 +++++ .../hadoop/hdds/scm/pipeline/Pipeline.java | 3 +- .../hadoop/ozone/HddsDatanodeService.java | 18 +++----- .../states/endpoint/VersionEndpointTask.java | 6 +-- .../server/ratis/ContainerStateMachine.java | 42 ++++++++++++++----- .../server/ratis/XceiverServerRatis.java | 8 +--- .../hadoop/ozone/TestHddsDatanodeService.java | 2 +- .../ozone/TestHddsSecureDatanodeInit.java | 2 +- .../hadoop/ozone/MiniOzoneChaosCluster.java | 6 +-- .../apache/hadoop/ozone/MiniOzoneCluster.java | 13 +----- .../hadoop/ozone/MiniOzoneClusterImpl.java | 15 +++---- .../hadoop/ozone/MiniOzoneHAClusterImpl.java | 8 ++-- .../TestBlockOutputStreamWithFailures.java | 2 +- .../TestContainerStateMachineFailures.java | 25 +++-------- .../ozone/shell/TestOzoneDatanodeShell.java | 2 +- 15 files changed, 76 insertions(+), 86 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java index 12efcc9aa202..9ce872328673 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.EnumSet; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.UUID; @@ -579,6 +580,15 @@ public boolean equals(Object obj) { uuid.equals(((DatanodeDetails) obj).uuid); } + + public boolean validateNodeValue(DatanodeDetails datanodeDetails) { + if (this == datanodeDetails || super.equals(datanodeDetails)) { + return true; + } + return Objects.equals(ipAddress, datanodeDetails.ipAddress) + && Objects.equals(hostName, datanodeDetails.hostName) && Objects.equals(ports, datanodeDetails.ports); + } + @Override public int hashCode() { return uuid.hashCode(); diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java index 6c5b4aff57f6..1e0f2d035470 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java @@ -330,7 +330,8 @@ public List getNodesInOrder() { } void reportDatanode(DatanodeDetails dn) throws IOException { - if (nodeStatus.get(dn) == null) { + if (dn == null || (nodeStatus.get(dn) == null + && nodeStatus.keySet().stream().noneMatch(node -> node.validateNodeValue(dn)))) { throw new IOException( String.format("Datanode=%s not part of pipeline=%s", dn, id)); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java index 65d9fa0d3607..55aeb466e7f1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java @@ -101,7 +101,6 @@ public class HddsDatanodeService extends GenericCli implements ServicePlugin { private static final Logger LOG = LoggerFactory.getLogger( HddsDatanodeService.class); - private final boolean terminateJVMOnDatanodeTerminate; private OzoneConfiguration conf; private SecurityConfig secConf; @@ -125,9 +124,7 @@ public class HddsDatanodeService extends GenericCli implements ServicePlugin { private ReconfigurationHandler reconfigurationHandler; //Constructor for DataNode PluginService - public HddsDatanodeService() { - this.terminateJVMOnDatanodeTerminate = true; - } + public HddsDatanodeService() { } /** * Create a Datanode instance based on the supplied command-line arguments. @@ -138,8 +135,8 @@ public HddsDatanodeService() { * @param args command line arguments. */ @VisibleForTesting - public HddsDatanodeService(String[] args, boolean terminateJVMOnDatanodeTerminate) { - this(terminateJVMOnDatanodeTerminate, false, args); + public HddsDatanodeService(String[] args) { + this(false, args); } /** @@ -148,9 +145,8 @@ public HddsDatanodeService(String[] args, boolean terminateJVMOnDatanodeTerminat * @param args command line arguments. * @param printBanner if true, then log a verbose startup message. */ - private HddsDatanodeService(boolean terminateJVMOnDatanodeTerminate, boolean printBanner, String[] args) { + private HddsDatanodeService(boolean printBanner, String[] args) { this.printBanner = printBanner; - this.terminateJVMOnDatanodeTerminate = terminateJVMOnDatanodeTerminate; this.args = args != null ? Arrays.copyOf(args, args.length) : null; } @@ -159,7 +155,7 @@ public static void main(String[] args) { OzoneNetUtils.disableJvmNetworkAddressCacheIfRequired( new OzoneConfiguration()); HddsDatanodeService hddsDatanodeService = - new HddsDatanodeService(true, true, args); + new HddsDatanodeService(true, args); hddsDatanodeService.run(args); } catch (Throwable e) { LOG.error("Exception in HddsDatanodeService.", e); @@ -528,9 +524,7 @@ public void join() { public void terminateDatanode() { stop(); - if (terminateJVMOnDatanodeTerminate) { - terminate(1); - } + terminate(1); } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java index e224076b1c28..968c9b9a6e66 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java @@ -33,7 +33,6 @@ import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; import com.google.common.base.Preconditions; -import org.apache.ratis.protocol.exceptions.RaftException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,9 +58,10 @@ public VersionEndpointTask(EndpointStateMachine rpcEndPoint, * Computes a result, or throws an exception if unable to do so. * * @return computed result + * @throws Exception if unable to compute a result */ @Override - public EndpointStateMachine.EndPointStates call() { + public EndpointStateMachine.EndPointStates call() throws Exception { rpcEndPoint.lock(); try { @@ -105,7 +105,7 @@ public EndpointStateMachine.EndPointStates call() { LOG.debug("Cannot execute GetVersion task as endpoint state machine " + "is in {} state", rpcEndPoint.getState()); } - } catch (DiskOutOfSpaceException | BindException | RaftException ex) { + } catch (DiskOutOfSpaceException | BindException ex) { rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); } catch (IOException ex) { rpcEndPoint.logIfNeeded(ex); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 3e240617cf71..f141cd8bf74e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -65,6 +65,7 @@ import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.common.utils.BufferUtils; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.impl.KeyValueStreamDataChannel; @@ -202,6 +203,7 @@ long getStartTime() { private final boolean waitOnBothFollowers; private final HddsDatanodeService datanodeService; private static Semaphore semaphore = new Semaphore(1); + private final AtomicBoolean peersValidated; /** * CSM metrics. @@ -252,6 +254,7 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI HDDS_CONTAINER_RATIS_STATEMACHINE_MAX_PENDING_APPLY_TXNS_DEFAULT); applyTransactionSemaphore = new Semaphore(maxPendingApplyTransactions); stateMachineHealthy = new AtomicBoolean(true); + this.peersValidated = new AtomicBoolean(false); ThreadFactory threadFactory = new ThreadFactoryBuilder() .setNameFormat( @@ -265,6 +268,27 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI } + private void validatePeers(RaftServer server, RaftGroupId id) throws IOException { + if (this.peersValidated.get()) { + return; + } + synchronized (peersValidated) { + if (!peersValidated.get()) { + RaftPeerId selfId = server.getId(); + Collection peers = server.getDivision(id).getGroup().getPeers(); + // If peers list is empty then it means Ratis hasn't created any raft--meta file containing the last applied + // transaction. Then the peer list can be only validated on apply transaction. + if (!peers.isEmpty() && peers.stream().noneMatch(raftPeer -> raftPeer != null + && raftPeer.getId().equals(selfId))) { + throw new StorageContainerException(String.format("Current datanodeId: %s is not part of the " + + "group : %s with quorum: %s", selfId, id, peers), ContainerProtos.Result.INVALID_CONFIG); + } else if (!peers.isEmpty()) { + peersValidated.set(true); + } + } + } + } + @Override public StateMachineStorage getStateMachineStorage() { return storage; @@ -275,17 +299,10 @@ public CSMMetrics getMetrics() { } @Override - public void initialize(RaftServer server, RaftGroupId id, RaftStorage raftStorage) throws IOException { + public void initialize( + RaftServer server, RaftGroupId id, RaftStorage raftStorage) + throws IOException { super.initialize(server, id, raftStorage); - RaftPeerId selfId = server.getId(); - Collection peers = server.getDivision(id).getGroup().getPeers(); - // If peers list is empty then it means Ratis hasn't created any raft--meta file containing the last applied - // transaction. - if (!peers.isEmpty() && peers.stream().noneMatch(raftPeer -> raftPeer != null && raftPeer.getId().equals(selfId))) { - throw new StateMachineException(String.format("Current datanodeId: %s is not part of the group : %s with " + - "quorum: %s", selfId, id, peers)); - } - storage.init(raftStorage); ratisServer.notifyGroupAdd(id); @@ -969,6 +986,11 @@ private CompletableFuture applyTransaction( final CheckedSupplier task = () -> { try { + try { + this.validatePeers(this.ratisServer.getServer(), getGroupId()); + } catch (StorageContainerException e) { + return ContainerUtils.logAndReturnError(LOG, e, request); + } long timeNow = Time.monotonicNowNanos(); long queueingDelay = timeNow - context.getStartTime(); metrics.recordQueueingDelay(request.getCmdType(), queueingDelay); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index afeacd467b1c..a4c143439852 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -28,7 +28,6 @@ import java.util.Objects; import java.util.UUID; import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutionException; @@ -86,7 +85,6 @@ import org.apache.ratis.proto.RaftProtos.RoleInfoProto; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.exceptions.NotLeaderException; -import org.apache.ratis.protocol.exceptions.RaftException; import org.apache.ratis.protocol.exceptions.StateMachineException; import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.GroupInfoReply; @@ -564,11 +562,7 @@ public void start() throws IOException { for (ThreadPoolExecutor executor : chunkExecutors) { executor.prestartAllCoreThreads(); } - try { - server.start(); - } catch (CompletionException e) { - throw new RaftException(e); - } + server.start(); RaftServerRpc serverRpc = server.getServerRpc(); clientPort = getRealPort(serverRpc.getClientServerAddress(), diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java index 96026d343520..e513412e377f 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java @@ -71,7 +71,7 @@ public class TestHddsDatanodeService { private final String clusterId = UUID.randomUUID().toString(); private final OzoneConfiguration conf = new OzoneConfiguration(); private final HddsDatanodeService service = - new HddsDatanodeService(new String[] {}, true); + new HddsDatanodeService(new String[] {}); private static final int SCM_SERVER_COUNT = 1; @BeforeEach diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java index 23dcdda61c92..253551115dd3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java @@ -112,7 +112,7 @@ public static void setUp() throws Exception { conf.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT1S"); // 1s securityConfig = new SecurityConfig(conf); - service = new HddsDatanodeService(args, true) { + service = new HddsDatanodeService(args) { @Override SCMSecurityProtocolClientSideTranslatorPB createScmSecurityClient() throws IOException { diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java index 0f45eee47f08..3c12bab4323b 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java @@ -105,9 +105,9 @@ public static FailureService of(String serviceName) { public MiniOzoneChaosCluster(OzoneConfiguration conf, OMHAService omService, SCMHAService scmService, List hddsDatanodes, String clusterPath, - Set> clazzes, boolean terminateJVMOnDNTerminate) { + Set> clazzes) { super(conf, new SCMConfigurator(), omService, scmService, hddsDatanodes, - clusterPath, null, terminateJVMOnDNTerminate); + clusterPath, null); this.numDatanodes = getHddsDatanodes().size(); this.numOzoneManagers = omService.getServices().size(); this.numStorageContainerManagers = scmService.getServices().size(); @@ -304,7 +304,7 @@ public MiniOzoneChaosCluster build() throws IOException { MiniOzoneChaosCluster cluster = new MiniOzoneChaosCluster(conf, omService, scmService, hddsDatanodes, - path, clazzes, terminateJVMOnDatanodeExit); + path, clazzes); if (startDataNodes) { cluster.startHddsDatanodes(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java index abdf662ee589..ff55ee83c176 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java @@ -307,14 +307,13 @@ abstract class Builder { protected int dnInitialVersion = DatanodeVersion.FUTURE_VERSION.toProtoValue(); protected int dnCurrentVersion = DatanodeVersion.COMBINED_PUTBLOCK_WRITECHUNK_RPC.toProtoValue(); - protected boolean terminateJVMOnDatanodeExit = true; + protected int numOfDatanodes = 3; protected boolean startDataNodes = true; protected CertificateClient certClient; protected SecretKeyClient secretKeyClient; protected DatanodeFactory dnFactory = UniformDatanodesFactory.newBuilder().build(); - protected Builder(OzoneConfiguration conf) { this.conf = conf; setClusterId(); @@ -341,16 +340,6 @@ private void setClusterId() { MiniOzoneClusterImpl.class.getSimpleName() + "-" + clusterId); } - /** - * For tests where datanode failure is to be tested. - * @param terminateJVMOnDatanodeExit if false will prevent datanode from terminating. - * @return - */ - public Builder setTerminateJVMOnDatanodeExit(boolean terminateJVMOnDatanodeExit) { - this.terminateJVMOnDatanodeExit = terminateJVMOnDatanodeExit; - return this; - } - /** * For tests that do not use any features of SCM, we can get by with * 0 datanodes. Also need to skip safemode in this case. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 0ea1c47a1fc9..3594996856af 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -139,7 +139,6 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster { private final List hddsDatanodes; private ReconServer reconServer; private Gateway s3g; - private final boolean terminateJVMOnDNTerminate; // Timeout for the cluster to be ready private int waitForClusterToBeReadyTimeout = 120000; // 2 min @@ -151,14 +150,13 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster { /** * Creates a new MiniOzoneCluster with Recon. */ - @SuppressWarnings("checkstyle:parameternumber") private MiniOzoneClusterImpl(OzoneConfiguration conf, SCMConfigurator scmConfigurator, OzoneManager ozoneManager, StorageContainerManager scm, List hddsDatanodes, ReconServer reconServer, - Gateway s3g, boolean terminateJVMOnDNTerminate) { + Gateway s3g) { this.conf = conf; this.ozoneManager = ozoneManager; this.scm = scm; @@ -166,7 +164,6 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf, this.reconServer = reconServer; this.scmConfigurator = scmConfigurator; this.s3g = s3g; - this.terminateJVMOnDNTerminate = terminateJVMOnDNTerminate; } /** @@ -176,12 +173,11 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf, * OzoneManagers and StorageContainerManagers. */ MiniOzoneClusterImpl(OzoneConfiguration conf, SCMConfigurator scmConfigurator, - List hddsDatanodes, ReconServer reconServer, boolean terminateJVMOnDNTerminate) { + List hddsDatanodes, ReconServer reconServer) { this.scmConfigurator = scmConfigurator; this.conf = conf; this.hddsDatanodes = hddsDatanodes; this.reconServer = reconServer; - this.terminateJVMOnDNTerminate = terminateJVMOnDNTerminate; } public SCMConfigurator getSCMConfigurator() { @@ -452,7 +448,7 @@ public void restartHddsDatanode(int i, boolean waitForDatanode) // wait for node to be removed from SCM healthy node list. waitForHddsDatanodeToStop(datanodeService.getDatanodeDetails()); } - HddsDatanodeService service = new HddsDatanodeService(NO_ARGS, terminateJVMOnDNTerminate); + HddsDatanodeService service = new HddsDatanodeService(NO_ARGS); service.setConfiguration(config); hddsDatanodes.add(i, service); startHddsDatanode(service); @@ -663,7 +659,7 @@ public MiniOzoneCluster build() throws IOException { MiniOzoneClusterImpl cluster = new MiniOzoneClusterImpl(conf, scmConfigurator, om, scm, - hddsDatanodes, reconServer, s3g, terminateJVMOnDatanodeExit); + hddsDatanodes, reconServer, s3g); cluster.setCAClient(certClient); cluster.setSecretKeyClient(secretKeyClient); @@ -879,7 +875,7 @@ protected List createHddsDatanodes() for (int i = 0; i < numOfDatanodes; i++) { OzoneConfiguration dnConf = dnFactory.apply(conf); - HddsDatanodeService datanode = new HddsDatanodeService(NO_ARGS, terminateJVMOnDatanodeExit); + HddsDatanodeService datanode = new HddsDatanodeService(NO_ARGS); datanode.setConfiguration(dnConf); hddsDatanodes.add(datanode); } @@ -938,5 +934,6 @@ private void configureS3G() { OzoneConfigurationHolder.setConfiguration(conf); } + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java index c939a147aac3..39c2250b73c3 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java @@ -80,7 +80,6 @@ public class MiniOzoneHAClusterImpl extends MiniOzoneClusterImpl { private static final int RATIS_RPC_TIMEOUT = 1000; // 1 second public static final int NODE_FAILURE_TIMEOUT = 2000; // 2 seconds - @SuppressWarnings("checkstyle:parameternumber") public MiniOzoneHAClusterImpl( OzoneConfiguration conf, SCMConfigurator scmConfigurator, @@ -88,9 +87,8 @@ public MiniOzoneHAClusterImpl( SCMHAService scmhaService, List hddsDatanodes, String clusterPath, - ReconServer reconServer, - boolean terminateJVMOnDNTerminate) { - super(conf, scmConfigurator, hddsDatanodes, reconServer, terminateJVMOnDNTerminate); + ReconServer reconServer) { + super(conf, scmConfigurator, hddsDatanodes, reconServer); this.omhaService = omhaService; this.scmhaService = scmhaService; this.clusterMetaPath = clusterPath; @@ -434,7 +432,7 @@ public MiniOzoneHAClusterImpl build() throws IOException { MiniOzoneHAClusterImpl cluster = new MiniOzoneHAClusterImpl(conf, scmConfigurator, omService, scmService, hddsDatanodes, path, - reconServer, terminateJVMOnDatanodeExit); + reconServer); if (startDataNodes) { cluster.startHddsDatanodes(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java index 5e5461634c0e..68fd5e254d8b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java @@ -751,7 +751,7 @@ void testDatanodeFailureWithPreAllocation(boolean flushDelay, boolean enablePigg assertEquals(0, blockOutputStream.getBufferPool().computeBufferData()); assertEquals(0, blockOutputStream.getCommitIndex2flushedDataMap().size()); assertEquals(0, keyOutputStream.getLocationInfoList().size()); - + PIPELINE = pipeline; cluster.restartHddsDatanode(pipeline.getNodes().get(0), true); // Written the same data twice diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java index 40003c0c4efa..dc922be157fc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java @@ -53,7 +53,6 @@ import org.apache.hadoop.hdds.scm.client.HddsClientUtils; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; -import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.ozone.HddsDatanodeService; @@ -72,7 +71,6 @@ import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; import org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine; import org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; @@ -177,7 +175,7 @@ public static void init() throws Exception { conf.setLong(OzoneConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_KEY, 1); conf.setQuietMode(false); cluster = - MiniOzoneCluster.newBuilder(conf).setNumDatanodes(10).setTerminateJVMOnDatanodeExit(false) + MiniOzoneCluster.newBuilder(conf).setNumDatanodes(10) .build(); cluster.waitForClusterToBeReady(); cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 60000); @@ -310,25 +308,12 @@ public void testContainerStateMachineRestartWithDNChangePipeline() boolean deleted = datanodeIdFile.delete(); assertTrue(deleted); cluster.restartHddsDatanode(dn, false); - GenericTestUtils.waitFor(() -> cluster.getHddsDatanodes().get(index).getDatanodeStateMachine() - .getContext().getState() == DatanodeStateMachine.DatanodeStates.SHUTDOWN, 1000, 30000); - key.write("ratis".getBytes(UTF_8)); - // Delete raft meta and restart dn, now datanode should come up. - cluster.getHddsDatanodes().get(index) - .getDatanodeStateMachine().getContainer().getMetaVolumeSet().getVolumesList() - .stream().forEach(v -> { - try { - FileUtils.deleteDirectory(v.getStorageDir()); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - cluster.restartHddsDatanode(index, true); - GenericTestUtils.waitFor(() -> { try { - return cluster.getStorageContainerManager().getPipelineManager().getPipeline(pipeline.getId()).isClosed(); - } catch (PipelineNotFoundException e) { + key.write("ratis".getBytes(UTF_8)); + key.flush(); + return groupOutputStream.getLocationInfoList().size() > 1; + } catch (IOException e) { throw new UncheckedIOException(e); } }, 1000, 30000); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDatanodeShell.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDatanodeShell.java index c07fc21b2c49..c40e2e009b6b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDatanodeShell.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDatanodeShell.java @@ -122,7 +122,7 @@ public void testDatanodeInvalidParamCommand() { private static class TestHddsDatanodeService extends HddsDatanodeService { TestHddsDatanodeService(String[] args) { - super(args, true); + super(args); } @Override From 844b4075a60f6835d008eddc873925afa644edeb Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 12 Nov 2024 21:30:22 -0800 Subject: [PATCH 07/13] HDDS-11667. Fix test Change-Id: If5b9c305b8367054e301192dc1a2aeae9b22d33f --- .../ozone/client/rpc/TestBlockOutputStreamWithFailures.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java index 68fd5e254d8b..5e5461634c0e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockOutputStreamWithFailures.java @@ -751,7 +751,7 @@ void testDatanodeFailureWithPreAllocation(boolean flushDelay, boolean enablePigg assertEquals(0, blockOutputStream.getBufferPool().computeBufferData()); assertEquals(0, blockOutputStream.getCommitIndex2flushedDataMap().size()); assertEquals(0, keyOutputStream.getLocationInfoList().size()); - PIPELINE = pipeline; + cluster.restartHddsDatanode(pipeline.getNodes().get(0), true); // Written the same data twice From aa760ce9fd3b5823e535bed1e88d5c391bab7ad7 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Wed, 13 Nov 2024 00:05:38 -0800 Subject: [PATCH 08/13] HDDS-11667. Fix findbugs Change-Id: I2a0eb6e418c15159715c0f133ce515af0f6cd0bf --- .../server/ratis/ContainerStateMachine.java | 24 ++++++++----------- .../TestContainerStateMachineFailures.java | 1 - 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index f141cd8bf74e..886030d95e34 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -272,20 +272,16 @@ private void validatePeers(RaftServer server, RaftGroupId id) throws IOException if (this.peersValidated.get()) { return; } - synchronized (peersValidated) { - if (!peersValidated.get()) { - RaftPeerId selfId = server.getId(); - Collection peers = server.getDivision(id).getGroup().getPeers(); - // If peers list is empty then it means Ratis hasn't created any raft--meta file containing the last applied - // transaction. Then the peer list can be only validated on apply transaction. - if (!peers.isEmpty() && peers.stream().noneMatch(raftPeer -> raftPeer != null - && raftPeer.getId().equals(selfId))) { - throw new StorageContainerException(String.format("Current datanodeId: %s is not part of the " + - "group : %s with quorum: %s", selfId, id, peers), ContainerProtos.Result.INVALID_CONFIG); - } else if (!peers.isEmpty()) { - peersValidated.set(true); - } - } + RaftPeerId selfId = server.getId(); + Collection peers = server.getDivision(id).getGroup().getPeers(); + // If peers list is empty then it means Ratis hasn't created any raft--meta file containing the last applied + // transaction. Then the peer list can be only validated on apply transaction. + if (!peers.isEmpty() && peers.stream().noneMatch(raftPeer -> raftPeer != null + && raftPeer.getId().equals(selfId))) { + throw new StorageContainerException(String.format("Current datanodeId: %s is not part of the " + + "group : %s with quorum: %s", selfId, id, peers), ContainerProtos.Result.INVALID_CONFIG); + } else if (!peers.isEmpty()) { + peersValidated.set(true); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java index dc922be157fc..e3759521c829 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java @@ -291,7 +291,6 @@ public void testContainerStateMachineRestartWithDNChangePipeline() pipeline)); DatanodeDetails dn = datanodes.get(0).getDatanodeDetails(); - int index = cluster.getHddsDatanodeIndex(dn); // Delete all data volumes. cluster.getHddsDatanode(dn).getDatanodeStateMachine().getContainer().getVolumeSet().getVolumesList() From d3094faefd629bfbcdabd4636bc5007f158fcb75 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 18 Nov 2024 20:17:07 -0800 Subject: [PATCH 09/13] HDDS-11667. Simplify validate peer function Change-Id: I17683608abab9e471d1597181a37f266ec7986b4 --- .../server/ratis/ContainerStateMachine.java | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 886030d95e34..1176c4a7989b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -79,6 +79,7 @@ import org.apache.ratis.proto.RaftProtos.StateMachineLogEntryProto; import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientRequest; +import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftGroupMemberId; import org.apache.ratis.protocol.RaftPeer; @@ -272,17 +273,13 @@ private void validatePeers(RaftServer server, RaftGroupId id) throws IOException if (this.peersValidated.get()) { return; } - RaftPeerId selfId = server.getId(); - Collection peers = server.getDivision(id).getGroup().getPeers(); - // If peers list is empty then it means Ratis hasn't created any raft--meta file containing the last applied - // transaction. Then the peer list can be only validated on apply transaction. - if (!peers.isEmpty() && peers.stream().noneMatch(raftPeer -> raftPeer != null - && raftPeer.getId().equals(selfId))) { - throw new StorageContainerException(String.format("Current datanodeId: %s is not part of the " + - "group : %s with quorum: %s", selfId, id, peers), ContainerProtos.Result.INVALID_CONFIG); - } else if (!peers.isEmpty()) { - peersValidated.set(true); + final RaftGroup group = ratisServer.getServerDivision(getGroupId()).getGroup(); + final RaftPeerId selfId = ratisServer.getServer().getId(); + if (group.getPeer(selfId) == null) { + throw new StorageContainerException("Current datanode " + selfId + " is not a member of " + group, + ContainerProtos.Result.INVALID_CONFIG); } + peersValidated.set(true); } @Override From 2f9b6118f4f395b5bd3f15ebc4e22b91ab9837a0 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 19 Nov 2024 11:42:19 -0800 Subject: [PATCH 10/13] HDDS-11667. Address review comments Change-Id: I108bc94e8a14c96b07e57e6a657d7971b3c4dc27 --- .../common/transport/server/ratis/ContainerStateMachine.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 1176c4a7989b..0be2b6de6eff 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -269,7 +269,7 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI } - private void validatePeers(RaftServer server, RaftGroupId id) throws IOException { + private void validatePeers() throws IOException { if (this.peersValidated.get()) { return; } @@ -980,7 +980,7 @@ private CompletableFuture applyTransaction( = () -> { try { try { - this.validatePeers(this.ratisServer.getServer(), getGroupId()); + this.validatePeers(); } catch (StorageContainerException e) { return ContainerUtils.logAndReturnError(LOG, e, request); } From 13e000dedfe91421ed0201100b9752da615fdef0 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 19 Nov 2024 17:45:21 -0800 Subject: [PATCH 11/13] HDDS-11667. Address review comments Change-Id: I5e0ce66c10794bd90f63bab9508e7753df59a2dc --- .../org/apache/hadoop/hdds/protocol/DatanodeDetails.java | 7 ++++++- .../java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java index 9ce872328673..afdd952c46b7 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java @@ -581,7 +581,12 @@ public boolean equals(Object obj) { } - public boolean validateNodeValue(DatanodeDetails datanodeDetails) { + /** + * Checks hostname, ipAddress & port of the 2 nodes are the same. + * @param datanodeDetails + * @return + */ + public boolean compareNodeValues(DatanodeDetails datanodeDetails) { if (this == datanodeDetails || super.equals(datanodeDetails)) { return true; } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java index 1e0f2d035470..8dafb6b27688 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/Pipeline.java @@ -330,8 +330,11 @@ public List getNodesInOrder() { } void reportDatanode(DatanodeDetails dn) throws IOException { + //This is a workaround for the case a datanode restarted with reinitializing it's dnId but it still reports the + // same set of pipelines it was part of. The pipeline report should be accepted for this anomalous condition. + // We rely on StaleNodeHandler in closing this pipeline eventually. if (dn == null || (nodeStatus.get(dn) == null - && nodeStatus.keySet().stream().noneMatch(node -> node.validateNodeValue(dn)))) { + && nodeStatus.keySet().stream().noneMatch(node -> node.compareNodeValues(dn)))) { throw new IOException( String.format("Datanode=%s not part of pipeline=%s", dn, id)); } From ae49c6ca58be0565f6915da62bc6871013175848 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 19 Nov 2024 18:49:16 -0800 Subject: [PATCH 12/13] HDDS-11667. Address review comments Change-Id: I37681251e339a898257d8fae55d9669f0cc7270e --- .../java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java index afdd952c46b7..c24f9f80f592 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java @@ -583,8 +583,8 @@ public boolean equals(Object obj) { /** * Checks hostname, ipAddress & port of the 2 nodes are the same. - * @param datanodeDetails - * @return + * @param datanodeDetails dnDetails object to compare with. + * @return true if the values match otherwise false. */ public boolean compareNodeValues(DatanodeDetails datanodeDetails) { if (this == datanodeDetails || super.equals(datanodeDetails)) { From 2f8efa3629ebf093b6246b413358c5bb22a9ab03 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 19 Nov 2024 19:50:39 -0800 Subject: [PATCH 13/13] HDDS-11667. Fix javadoc warning Change-Id: I6acedc565f79bd482538ccf84641a79d4e011060 --- .../java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java index c24f9f80f592..b687e2b3e85c 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java @@ -582,7 +582,7 @@ public boolean equals(Object obj) { /** - * Checks hostname, ipAddress & port of the 2 nodes are the same. + * Checks hostname, ipAddress and port of the 2 nodes are the same. * @param datanodeDetails dnDetails object to compare with. * @return true if the values match otherwise false. */