Repository: hadoop Updated Branches: refs/heads/trunk 176bb3f81 -> ee44b069c
HDDS-866. Handle RaftRetryFailureException in OzoneClient. Contributed by Shashikant Banerjee. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ee44b069 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ee44b069 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ee44b069 Branch: refs/heads/trunk Commit: ee44b069c66703cd6c804c492647371fa0aa3501 Parents: 176bb3f Author: Shashikant Banerjee <[email protected]> Authored: Thu Nov 22 15:02:07 2018 +0530 Committer: Shashikant Banerjee <[email protected]> Committed: Thu Nov 22 15:02:07 2018 +0530 ---------------------------------------------------------------------- .../hadoop/hdds/scm/XceiverClientRatis.java | 9 +++- .../container/common/impl/HddsDispatcher.java | 56 ++++++++++---------- .../server/ratis/XceiverServerRatis.java | 14 ++--- .../container/keyvalue/KeyValueContainer.java | 3 ++ hadoop-hdds/pom.xml | 2 +- .../ozone/client/io/ChunkGroupOutputStream.java | 9 ++-- .../client/rpc/TestFailureHandlingByClient.java | 10 +--- hadoop-ozone/pom.xml | 2 +- 8 files changed, 54 insertions(+), 51 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee44b069/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java index b238a09..8a07526 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hdds.scm; import org.apache.hadoop.hdds.HddsUtils; import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.protocol.RaftRetryFailureException; import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.thirdparty.com.google.protobuf .InvalidProtocolBufferException; @@ -196,10 +197,16 @@ public final class XceiverClientRatis extends XceiverClientSpi { new ArrayList<>(); CompletableFuture<ContainerCommandResponseProto> containerCommandResponse = raftClientReply.whenComplete((reply, e) -> LOG - .debug("received reply {} for request: {} exception: {}", request, + .info("received reply {} for request: {} exception: {}", request, reply, e)) .thenApply(reply -> { try { + // we need to handle RaftRetryFailure Exception + RaftRetryFailureException raftRetryFailureException = + reply.getRetryFailureException(); + if (raftRetryFailureException != null) { + throw new CompletionException(raftRetryFailureException); + } ContainerCommandResponseProto response = ContainerCommandResponseProto .parseFrom(reply.getMessage().getContent()); http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee44b069/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java index b8669fb..24ba784 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java @@ -284,7 +284,12 @@ public class HddsDispatcher implements ContainerDispatcher, Auditor { @Override public void validateContainerCommand( ContainerCommandRequestProto msg) throws StorageContainerException { - ContainerType containerType = msg.getCreateContainer().getContainerType(); + long containerID = msg.getContainerID(); + Container container = getContainer(containerID); + if (container == null) { + return; + } + ContainerType containerType = container.getContainerType(); ContainerProtos.Type cmdType = msg.getCmdType(); AuditAction action = ContainerCommandRequestPBHelper.getAuditAction(cmdType); @@ -299,35 +304,30 @@ public class HddsDispatcher implements ContainerDispatcher, Auditor { audit(action, eventType, params, AuditEventStatus.FAILURE, ex); throw ex; } - long containerID = msg.getContainerID(); - Container container; - container = getContainer(containerID); - - if (container != null) { - State containerState = container.getContainerState(); - if (!HddsUtils.isReadOnly(msg) && containerState != State.OPEN) { - switch (cmdType) { - case CreateContainer: - // Create Container is idempotent. There is nothing to validate. - break; - case CloseContainer: - // If the container is unhealthy, closeContainer will be rejected - // while execution. Nothing to validate here. - break; - default: - // if the container is not open, no updates can happen. Just throw - // an exception - ContainerNotOpenException cex = new ContainerNotOpenException( - "Container " + containerID + " in " + containerState + " state"); - audit(action, eventType, params, AuditEventStatus.FAILURE, cex); - throw cex; - } - } else if (HddsUtils.isReadOnly(msg) && containerState == State.INVALID) { - InvalidContainerStateException iex = new InvalidContainerStateException( + + State containerState = container.getContainerState(); + if (!HddsUtils.isReadOnly(msg) && containerState != State.OPEN) { + switch (cmdType) { + case CreateContainer: + // Create Container is idempotent. There is nothing to validate. + break; + case CloseContainer: + // If the container is unhealthy, closeContainer will be rejected + // while execution. Nothing to validate here. + break; + default: + // if the container is not open, no updates can happen. Just throw + // an exception + ContainerNotOpenException cex = new ContainerNotOpenException( "Container " + containerID + " in " + containerState + " state"); - audit(action, eventType, params, AuditEventStatus.FAILURE, iex); - throw iex; + audit(action, eventType, params, AuditEventStatus.FAILURE, cex); + throw cex; } + } else if (HddsUtils.isReadOnly(msg) && containerState == State.INVALID) { + InvalidContainerStateException iex = new InvalidContainerStateException( + "Container " + containerID + " in " + containerState + " state"); + audit(action, eventType, params, AuditEventStatus.FAILURE, iex); + throw iex; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee44b069/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index 3e54700..a4927e0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -464,17 +464,13 @@ public final class XceiverServerRatis implements XceiverServerSpi { @Override public boolean isExist(HddsProtos.PipelineID pipelineId) { - try { - for (RaftGroupId groupId : server.getGroupIds()) { - if (PipelineID.valueOf( - groupId.getUuid()).getProtobuf().equals(pipelineId)) { - return true; - } + for (RaftGroupId groupId : server.getGroupIds()) { + if (PipelineID.valueOf(groupId.getUuid()).getProtobuf() + .equals(pipelineId)) { + return true; } - return false; - } catch (IOException e) { - return false; } + return false; } @Override http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee44b069/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index b46fd38..e737a53 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -313,6 +313,9 @@ public class KeyValueContainer implements Container<KeyValueContainerData> { try { MetadataStore db = BlockUtils.getDB(containerData, config); db.compactDB(); + LOG.info("Container {} is closed with bcsId {}.", + containerData.getContainerID(), + containerData.getBlockCommitSequenceId()); } catch (StorageContainerException ex) { throw ex; } catch (IOException ex) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee44b069/hadoop-hdds/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-hdds/pom.xml b/hadoop-hdds/pom.xml index d4fa64e..869ecbf 100644 --- a/hadoop-hdds/pom.xml +++ b/hadoop-hdds/pom.xml @@ -46,7 +46,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> <hdds.version>0.4.0-SNAPSHOT</hdds.version> <!-- Apache Ratis version --> - <ratis.version>0.3.0-6f3419a-SNAPSHOT</ratis.version> + <ratis.version>0.4.0-b600fc2-SNAPSHOT</ratis.version> <bouncycastle.version>1.60</bouncycastle.version> http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee44b069/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupOutputStream.java ---------------------------------------------------------------------- diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupOutputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupOutputStream.java index d2a4443..79b0fe7 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupOutputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupOutputStream.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hdds.scm.container.common.helpers import org.apache.hadoop.hdds.scm.protocolPB .StorageContainerLocationProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.storage.ChunkOutputStream; +import org.apache.ratis.protocol.RaftRetryFailureException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -393,7 +394,7 @@ public class ChunkGroupOutputStream extends OutputStream { private boolean checkIfContainerIsClosed(IOException ioe) { if (ioe.getCause() != null) { - return checkIfContainerNotOpenException(ioe) || Optional + return checkIfContainerNotOpenOrRaftRetryFailureException(ioe) || Optional .of(ioe.getCause()) .filter(e -> e instanceof StorageContainerException) .map(e -> (StorageContainerException) e) @@ -403,10 +404,12 @@ public class ChunkGroupOutputStream extends OutputStream { return false; } - private boolean checkIfContainerNotOpenException(IOException ioe) { + private boolean checkIfContainerNotOpenOrRaftRetryFailureException( + IOException ioe) { Throwable t = ioe.getCause(); while (t != null) { - if (t instanceof ContainerNotOpenException) { + if (t instanceof ContainerNotOpenException + || t instanceof RaftRetryFailureException) { return true; } t = t.getCause(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee44b069/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java ---------------------------------------------------------------------- diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java index 923300f..98976cf 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java @@ -110,10 +110,6 @@ public class TestFailureHandlingByClient { } } - // TODO: currently, shutting down 2 datanodes in Ratis leads to - // watchForCommit Api in RaftClient to hand=g forever. Once that gets - // fixed, we need to execute the tets with 2 node failures. - @Test public void testBlockWritesWithDnFailures() throws Exception { String keyName = "ratis3"; @@ -139,7 +135,7 @@ public class TestFailureHandlingByClient { .getPipeline(container.getPipelineID()); List<DatanodeDetails> datanodes = pipeline.getNodes(); cluster.shutdownHddsDatanode(datanodes.get(0)); - // cluster.shutdownHddsDatanode(datanodes.get(1)); + cluster.shutdownHddsDatanode(datanodes.get(1)); // The write will fail but exception will be handled and length will be // updated correctly in OzoneManager once the steam is closed key.close(); @@ -151,7 +147,6 @@ public class TestFailureHandlingByClient { OmKeyInfo keyInfo = cluster.getOzoneManager().lookupKey(keyArgs); Assert.assertEquals(data.length, keyInfo.getDataSize()); validateData(keyName, data); - cluster.restartHddsDatanode(datanodes.get(0), true); } @Test @@ -179,8 +174,8 @@ public class TestFailureHandlingByClient { .getPipeline(container.getPipelineID()); List<DatanodeDetails> datanodes = pipeline.getNodes(); cluster.shutdownHddsDatanode(datanodes.get(0)); + cluster.shutdownHddsDatanode(datanodes.get(1)); - // cluster.shutdownHddsDatanode(datanodes.get(1)); // The write will fail but exception will be handled and length will be // updated correctly in OzoneManager once the steam is closed key.write(data.getBytes()); @@ -192,7 +187,6 @@ public class TestFailureHandlingByClient { OmKeyInfo keyInfo = cluster.getOzoneManager().lookupKey(keyArgs); Assert.assertEquals(2 * data.getBytes().length, keyInfo.getDataSize()); validateData(keyName, data.concat(data).getBytes()); - cluster.restartHddsDatanode(datanodes.get(0), true); } private OzoneOutputStream createKey(String keyName, ReplicationType type, http://git-wip-us.apache.org/repos/asf/hadoop/blob/ee44b069/hadoop-ozone/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-ozone/pom.xml b/hadoop-ozone/pom.xml index f020abe..39c65d5 100644 --- a/hadoop-ozone/pom.xml +++ b/hadoop-ozone/pom.xml @@ -33,7 +33,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> <hadoop.version>3.2.1-SNAPSHOT</hadoop.version> <hdds.version>0.4.0-SNAPSHOT</hdds.version> <ozone.version>0.4.0-SNAPSHOT</ozone.version> - <ratis.version>0.3.0-6f3419a-SNAPSHOT</ratis.version> + <ratis.version>0.4.0-b600fc2-SNAPSHOT</ratis.version> <bouncycastle.version>1.60</bouncycastle.version> <ozone.release>Badlands</ozone.release> <declared.ozone.version>${ozone.version}</declared.ozone.version> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
