Repository: hadoop Updated Branches: refs/heads/ozone-0.2 d8668ef0b -> c04f36db9
HDDS-370. Add and implement following functions in SCMClientProtocolServer. Contributed by Ajay Kumar. (cherry picked from commit 3928af983eb94d033e762739d3691c1618ba4f77) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c04f36db Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c04f36db Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c04f36db Branch: refs/heads/ozone-0.2 Commit: c04f36db95d34f57e470234664d0c8caddbb625b Parents: d8668ef Author: Ajay Kumar <[email protected]> Authored: Tue Sep 25 12:50:51 2018 -0700 Committer: Ajay Kumar <[email protected]> Committed: Tue Sep 25 13:11:43 2018 -0700 ---------------------------------------------------------------------- .../StorageContainerLocationProtocol.java | 16 + ...rLocationProtocolClientSideTranslatorPB.java | 42 +++ ...rLocationProtocolServerSideTranslatorPB.java | 32 ++ .../StorageContainerLocationProtocol.proto | 27 ++ .../hdds/scm/server/SCMChillModeManager.java | 8 +- .../scm/server/SCMClientProtocolServer.java | 22 ++ .../scm/server/StorageContainerManager.java | 8 + .../ozone/TestStorageContainerManager.java | 151 --------- .../hadoop/ozone/om/TestScmChillMode.java | 330 +++++++++++++++---- 9 files changed, 414 insertions(+), 222 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c04f36db/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index c55062b..e38077f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -133,4 +133,20 @@ public interface StorageContainerLocationProtocol { * @throws IOException */ ScmInfo getScmInfo() throws IOException; + + /** + * Check if SCM is in chill mode. + * + * @return Returns true if SCM is in chill mode else returns false. + * @throws IOException + */ + boolean inChillMode() throws IOException; + + /** + * Force SCM out of Chill mode. + * + * @return returns true if operation is successful. + * @throws IOException + */ + boolean forceExitChillMode() throws IOException; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c04f36db/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 0441469..16819e9 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -20,8 +20,12 @@ import com.google.common.base.Preconditions; import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitChillModeRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitChillModeResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InChillModeRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InChillModeResponseProto; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; @@ -317,6 +321,44 @@ public final class StorageContainerLocationProtocolClientSideTranslatorPB } + /** + * Check if SCM is in chill mode. + * + * @return Returns true if SCM is in chill mode else returns false. + * @throws IOException + */ + @Override + public boolean inChillMode() throws IOException { + InChillModeRequestProto request = + InChillModeRequestProto.getDefaultInstance(); + try { + InChillModeResponseProto resp = rpcProxy.inChillMode( + NULL_RPC_CONTROLLER, request); + return resp.getInChillMode(); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } + + /** + * Force SCM out of Chill mode. + * + * @return returns true if operation is successful. + * @throws IOException + */ + @Override + public boolean forceExitChillMode() throws IOException { + ForceExitChillModeRequestProto request = + ForceExitChillModeRequestProto.getDefaultInstance(); + try { + ForceExitChillModeResponseProto resp = rpcProxy + .forceExitChillMode(NULL_RPC_CONTROLLER, request); + return resp.getExitedChillMode(); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } + @Override public Object getUnderlyingProxyObject() { return rpcProxy; http://git-wip-us.apache.org/repos/asf/hadoop/blob/c04f36db/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/StorageContainerLocationProtocolServerSideTranslatorPB.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/StorageContainerLocationProtocolServerSideTranslatorPB.java index 9175ebf..d2723f0 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -21,6 +21,14 @@ package org.apache.hadoop.ozone.protocolPB; import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerLocationProtocolProtos.InChillModeRequestProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerLocationProtocolProtos.InChillModeResponseProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerLocationProtocolProtos.ForceExitChillModeRequestProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerLocationProtocolProtos.ForceExitChillModeResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineResponseProto; import org.apache.hadoop.hdds.scm.ScmInfo; @@ -218,4 +226,28 @@ public final class StorageContainerLocationProtocolServerSideTranslatorPB } } + + @Override + public InChillModeResponseProto inChillMode( + RpcController controller, + InChillModeRequestProto request) throws ServiceException { + try { + return InChillModeResponseProto.newBuilder() + .setInChillMode(impl.inChillMode()).build(); + } catch (IOException ex) { + throw new ServiceException(ex); + } + } + + @Override + public ForceExitChillModeResponseProto forceExitChillMode( + RpcController controller, ForceExitChillModeRequestProto request) + throws ServiceException { + try { + return ForceExitChillModeResponseProto.newBuilder() + .setExitedChillMode(impl.forceExitChillMode()).build(); + } catch (IOException ex) { + throw new ServiceException(ex); + } + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c04f36db/hadoop-hdds/common/src/main/proto/StorageContainerLocationProtocol.proto ---------------------------------------------------------------------- diff --git a/hadoop-hdds/common/src/main/proto/StorageContainerLocationProtocol.proto b/hadoop-hdds/common/src/main/proto/StorageContainerLocationProtocol.proto index 68cc35f..fb01d6a 100644 --- a/hadoop-hdds/common/src/main/proto/StorageContainerLocationProtocol.proto +++ b/hadoop-hdds/common/src/main/proto/StorageContainerLocationProtocol.proto @@ -150,6 +150,21 @@ message PipelineResponseProto { optional string errorMessage = 3; } + +message InChillModeRequestProto { +} + +message InChillModeResponseProto { + required bool inChillMode = 1; +} + +message ForceExitChillModeRequestProto { +} + +message ForceExitChillModeResponseProto { + required bool exitedChillMode = 1; +} + /** * Protocol used from an HDFS node to StorageContainerManager. See the request * and response messages for details of the RPC calls. @@ -209,4 +224,16 @@ service StorageContainerLocationProtocolService { */ rpc getScmInfo(GetScmInfoRequestProto) returns (GetScmInfoRespsonseProto); + + /** + * Checks if SCM is in ChillMode. + */ + rpc inChillMode(InChillModeRequestProto) + returns (InChillModeResponseProto); + + /** + * Returns information about SCM. + */ + rpc forceExitChillMode(ForceExitChillModeRequestProto) + returns (ForceExitChillModeResponseProto); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c04f36db/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java index b35ac1b..3c1cc8f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java @@ -204,9 +204,11 @@ public class SCMChillModeManager implements } } }); - - LOG.info("SCM in chill mode. {} % containers have at least one reported " - + "replica.", (containerWithMinReplicas.get() / maxContainer) * 100); + if(inChillMode.get()) { + LOG.info("SCM in chill mode. {} % containers have at least one" + + " reported replica.", + (containerWithMinReplicas.get() / maxContainer) * 100); + } } @Override http://git-wip-us.apache.org/repos/asf/hadoop/blob/c04f36db/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 3f1943c..66136f1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -302,6 +302,28 @@ public class SCMClientProtocolServer implements } /** + * Check if SCM is in chill mode. + * + * @return Returns true if SCM is in chill mode else returns false. + * @throws IOException + */ + @Override + public boolean inChillMode() throws IOException { + return scm.isInChillMode(); + } + + /** + * Force SCM out of Chill mode. + * + * @return returns true if operation is successful. + * @throws IOException + */ + @Override + public boolean forceExitChillMode() throws IOException { + return scm.exitChillMode(); + } + + /** * Queries a list of Node that match a set of statuses. * * <p>For example, if the nodeStatuses is HEALTHY and RAFT_MEMBER, then http://git-wip-us.apache.org/repos/asf/hadoop/blob/c04f36db/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index b2cbc93..f491d30 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -887,6 +887,14 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl return eventQueue; } + /** + * Force SCM out of chill mode. + */ + public boolean exitChillMode() { + scmChillModeManager.exitChillMode(eventQueue); + return true; + } + @VisibleForTesting public double getCurrentContainerThreshold() { return scmChillModeManager.getCurrentContainerThreshold(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/c04f36db/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java ---------------------------------------------------------------------- diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java index 94ab6c8..fe53bcc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java @@ -18,13 +18,10 @@ package org.apache.hadoop.ozone; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.google.common.util.concurrent.AtomicDouble; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; @@ -35,16 +32,11 @@ import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.lang3.RandomUtils; -import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto; import org.apache.hadoop.hdds.scm.ScmConfigKeys; @@ -52,13 +44,9 @@ import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.block.DeletedBlockLog; import org.apache.hadoop.hdds.scm.block.SCMBlockDeletingService; -import org.apache.hadoop.hdds.scm.container.ContainerMapping; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; -import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.node.NodeManager; -import org.apache.hadoop.hdds.scm.server.SCMChillModeManager; import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer; import org.apache.hadoop.hdds.scm.server.SCMStorage; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; @@ -69,7 +57,6 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.hadoop.test.LambdaTestUtils; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -467,142 +454,4 @@ public class TestStorageContainerManager { Assert.assertEquals(clusterId, scmInfo.getClusterId()); Assert.assertEquals(scmId, scmInfo.getScmId()); } - - @Test - public void testSCMChillMode() throws Exception { - OzoneConfiguration conf = new OzoneConfiguration(); - MiniOzoneCluster.Builder builder = MiniOzoneCluster.newBuilder(conf) - .setHbInterval(1000) - .setNumDatanodes(3) - .setStartDataNodes(false) - .setHbProcessorInterval(500); - MiniOzoneClusterImpl cluster = (MiniOzoneClusterImpl) builder.build(); - // Test1: Test chill mode when there are no containers in system. - assertTrue(cluster.getStorageContainerManager().isInChillMode()); - cluster.startHddsDatanodes(); - cluster.waitForClusterToBeReady(); - assertFalse(cluster.getStorageContainerManager().isInChillMode()); - - // Test2: Test chill mode when containers are there in system. - // Create {numKeys} random names keys. - TestStorageContainerManagerHelper helper = - new TestStorageContainerManagerHelper(cluster, conf); - Map<String, OmKeyInfo> keyLocations = helper.createKeys(100*2, 4096); - final List<ContainerInfo> containers = cluster.getStorageContainerManager() - .getScmContainerManager().getStateManager().getAllContainers(); - GenericTestUtils.waitFor(() -> { - return containers.size() > 10; - }, 100, 1000); - - // Removing some container to keep them open. - containers.remove(0); - containers.remove(1); - containers.remove(2); - containers.remove(3); - - // Close remaining containers - ContainerMapping mapping = (ContainerMapping) cluster - .getStorageContainerManager().getScmContainerManager(); - containers.forEach(c -> { - try { - mapping.updateContainerState(c.getContainerID(), - HddsProtos.LifeCycleEvent.FINALIZE); - mapping.updateContainerState(c.getContainerID(), - LifeCycleEvent.CLOSE); - } catch (IOException e) { - LOG.info("Failed to change state of open containers.", e); - } - }); - cluster.stop(); - - GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer - .captureLogs(SCMChillModeManager.getLogger()); - logCapturer.clearOutput(); - AtomicReference<MiniOzoneCluster> miniCluster = new AtomicReference<>(); - new Thread(() -> { - try { - miniCluster.set(builder.setStartDataNodes(false).build()); - } catch (IOException e) { - fail("failed"); - } - }).start(); - - StorageContainerManager scm; - GenericTestUtils.waitFor(() -> { - return miniCluster.get() != null; - }, 100, 1000 * 3); - - scm = miniCluster.get().getStorageContainerManager(); - assertTrue(scm.isInChillMode()); - assertFalse(logCapturer.getOutput().contains("SCM exiting chill mode.")); - assertTrue(scm.getCurrentContainerThreshold() == 0); - AtomicDouble curThreshold = new AtomicDouble(); - AtomicDouble lastReportedThreshold = new AtomicDouble(); - for(HddsDatanodeService dn:miniCluster.get().getHddsDatanodes()){ - dn.start(null); - GenericTestUtils.waitFor(() -> { - curThreshold.set(scm.getCurrentContainerThreshold()); - return curThreshold.get() > lastReportedThreshold.get(); - }, 100, 1000 * 5); - lastReportedThreshold.set(curThreshold.get()); - } - double chillModeCutoff = conf - .getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT, - HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT); - assertTrue(scm.getCurrentContainerThreshold() >= chillModeCutoff); - assertTrue(logCapturer.getOutput().contains("SCM exiting chill mode.")); - assertFalse(scm.isInChillMode()); - cluster.shutdown(); - } - - @Test - public void testSCMChillModeRestrictedOp() throws Exception { - - OzoneConfiguration conf = new OzoneConfiguration(); - conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, - OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB); - - MiniOzoneClusterImpl cluster = (MiniOzoneClusterImpl) MiniOzoneCluster - .newBuilder(conf) - .setHbInterval(1000) - .setHbProcessorInterval(500) - .setStartDataNodes(false) - .build(); - - StorageContainerManager scm = cluster.getStorageContainerManager(); - assertTrue(scm.isInChillMode()); - - LambdaTestUtils.intercept(SCMException.class, - "ChillModePrecheck failed for allocateContainer", () -> { - scm.getClientProtocolServer() - .allocateContainer(ReplicationType.STAND_ALONE, - ReplicationFactor.ONE, ""); - }); - - cluster.startHddsDatanodes(); - cluster.waitForClusterToBeReady(); - assertFalse(scm.isInChillMode()); - - TestStorageContainerManagerHelper helper = - new TestStorageContainerManagerHelper(cluster, conf); - helper.createKeys(10, 4096); - SCMClientProtocolServer clientProtocolServer = cluster - .getStorageContainerManager().getClientProtocolServer(); - assertFalse((scm.getClientProtocolServer()).getChillModeStatus()); - final List<ContainerInfo> containers = scm.getScmContainerManager() - .getStateManager().getAllContainers(); - scm.getEventQueue().fireEvent(SCMEvents.CHILL_MODE_STATUS, true); - GenericTestUtils.waitFor(() -> { - return clientProtocolServer.getChillModeStatus(); - }, 50, 1000 * 5); - assertTrue(clientProtocolServer.getChillModeStatus()); - - LambdaTestUtils.intercept(SCMException.class, - "Open container " + containers.get(0).getContainerID() + " " - + "doesn't have enough replicas to service this operation in Chill" - + " mode.", () -> clientProtocolServer - .getContainerWithPipeline(containers.get(0).getContainerID())); - cluster.shutdown(); - } - } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c04f36db/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java ---------------------------------------------------------------------- diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java index 954fa0f..2a6921a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java @@ -17,15 +17,30 @@ */ package org.apache.hadoop.ozone.om; +import com.google.common.util.concurrent.AtomicDouble; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.scm.container.ContainerMapping; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; +import org.apache.hadoop.hdds.scm.server.SCMChillModeManager; +import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneClusterImpl; +import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.TestStorageContainerManagerHelper; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; @@ -41,7 +56,11 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; /** @@ -49,10 +68,14 @@ import static org.junit.Assert.fail; */ public class TestScmChillMode { + private final static Logger LOG = LoggerFactory + .getLogger(TestScmChillMode.class); private static MiniOzoneCluster cluster = null; private static MiniOzoneCluster.Builder builder = null; private static OzoneConfiguration conf; private static OzoneManager om; + private static StorageContainerLocationProtocolClientSideTranslatorPB + storageContainerLocationClient; @Rule @@ -77,6 +100,8 @@ public class TestScmChillMode { cluster.startHddsDatanodes(); cluster.waitForClusterToBeReady(); om = cluster.getOzoneManager(); + storageContainerLocationClient = cluster + .getStorageContainerLocationClient(); } /** @@ -85,7 +110,11 @@ public class TestScmChillMode { @After public void shutdown() { if (cluster != null) { - cluster.shutdown(); + try { + cluster.shutdown(); + } catch (Exception e) { + // do nothing. + } } } @@ -93,79 +122,244 @@ public class TestScmChillMode { public void testChillModeOperations() throws Exception { final AtomicReference<MiniOzoneCluster> miniCluster = new AtomicReference<>(); + // Create {numKeys} random names keys. + TestStorageContainerManagerHelper helper = + new TestStorageContainerManagerHelper(cluster, conf); + Map<String, OmKeyInfo> keyLocations = helper.createKeys(100, 4096); + final List<ContainerInfo> containers = cluster + .getStorageContainerManager() + .getScmContainerManager().getStateManager().getAllContainers(); + GenericTestUtils.waitFor(() -> { + return containers.size() > 10; + }, 100, 1000); - try { - // Create {numKeys} random names keys. - TestStorageContainerManagerHelper helper = - new TestStorageContainerManagerHelper(cluster, conf); - Map<String, OmKeyInfo> keyLocations = helper.createKeys(100, 4096); - final List<ContainerInfo> containers = cluster - .getStorageContainerManager() - .getScmContainerManager().getStateManager().getAllContainers(); - GenericTestUtils.waitFor(() -> { - return containers.size() > 10; - }, 100, 1000); - - String volumeName = "volume" + RandomStringUtils.randomNumeric(5); - String bucketName = "bucket" + RandomStringUtils.randomNumeric(5); - String keyName = "key" + RandomStringUtils.randomNumeric(5); - String userName = "user" + RandomStringUtils.randomNumeric(5); - String adminName = "admin" + RandomStringUtils.randomNumeric(5); - OmKeyArgs keyArgs = new OmKeyArgs.Builder() - .setVolumeName(volumeName) - .setBucketName(bucketName) - .setKeyName(keyName) - .setDataSize(1000) - .build(); - OmVolumeArgs volArgs = new OmVolumeArgs.Builder() - .setAdminName(adminName) - .setCreationTime(Time.monotonicNow()) - .setQuotaInBytes(10000) - .setVolume(volumeName) - .setOwnerName(userName) - .build(); - OmBucketInfo bucketInfo = new OmBucketInfo.Builder() - .setBucketName(bucketName) - .setIsVersionEnabled(false) - .setVolumeName(volumeName) - .build(); - om.createVolume(volArgs); - om.createBucket(bucketInfo); - om.openKey(keyArgs); - //om.commitKey(keyArgs, 1); - - cluster.stop(); - - new Thread(() -> { - try { - miniCluster.set(builder.build()); - } catch (IOException e) { - fail("failed"); - } - }).start(); - - StorageContainerManager scm; - GenericTestUtils.waitFor(() -> { - return miniCluster.get() != null; - }, 100, 1000 * 3); + String volumeName = "volume" + RandomStringUtils.randomNumeric(5); + String bucketName = "bucket" + RandomStringUtils.randomNumeric(5); + String keyName = "key" + RandomStringUtils.randomNumeric(5); + String userName = "user" + RandomStringUtils.randomNumeric(5); + String adminName = "admin" + RandomStringUtils.randomNumeric(5); + OmKeyArgs keyArgs = new OmKeyArgs.Builder() + .setVolumeName(volumeName) + .setBucketName(bucketName) + .setKeyName(keyName) + .setDataSize(1000) + .build(); + OmVolumeArgs volArgs = new OmVolumeArgs.Builder() + .setAdminName(adminName) + .setCreationTime(Time.monotonicNow()) + .setQuotaInBytes(10000) + .setVolume(volumeName) + .setOwnerName(userName) + .build(); + OmBucketInfo bucketInfo = new OmBucketInfo.Builder() + .setBucketName(bucketName) + .setIsVersionEnabled(false) + .setVolumeName(volumeName) + .build(); + om.createVolume(volArgs); + om.createBucket(bucketInfo); + om.openKey(keyArgs); + //om.commitKey(keyArgs, 1); + + cluster.stop(); + + new Thread(() -> { + try { + miniCluster.set(builder.build()); + } catch (IOException e) { + fail("failed"); + } + }).start(); - scm = miniCluster.get().getStorageContainerManager(); - Assert.assertTrue(scm.isInChillMode()); + StorageContainerManager scm; + GenericTestUtils.waitFor(() -> { + return miniCluster.get() != null; + }, 100, 1000 * 3); + cluster = miniCluster.get(); - om = miniCluster.get().getOzoneManager(); + scm = cluster.getStorageContainerManager(); + Assert.assertTrue(scm.isInChillMode()); - LambdaTestUtils.intercept(OMException.class, - "ChillModePrecheck failed for allocateBlock", - () -> om.openKey(keyArgs)); + om = miniCluster.get().getOzoneManager(); - } finally { - if (miniCluster.get() != null) { - try { - miniCluster.get().shutdown(); - } catch (Exception e) { - // do nothing. - } + LambdaTestUtils.intercept(OMException.class, + "ChillModePrecheck failed for allocateBlock", + () -> om.openKey(keyArgs)); + } + + /** + * Tests inChillMode & forceExitChillMode api calls. + */ + @Test + public void testIsScmInChillModeAndForceExit() throws Exception { + final AtomicReference<MiniOzoneCluster> miniCluster = + new AtomicReference<>(); + // Test 1: SCM should be out of chill mode. + Assert.assertFalse(storageContainerLocationClient.inChillMode()); + cluster.stop(); + // Restart the cluster with same metadata dir. + new Thread(() -> { + try { + miniCluster.set(builder.build()); + } catch (IOException e) { + Assert.fail("Cluster startup failed."); } + }).start(); + GenericTestUtils.waitFor(() -> { + return miniCluster.get() != null; + }, 10, 1000 * 3); + cluster = miniCluster.get(); + + // Test 2: Scm should be in chill mode as datanodes are not started yet. + storageContainerLocationClient = cluster + .getStorageContainerLocationClient(); + Assert.assertTrue(storageContainerLocationClient.inChillMode()); + // Force scm out of chill mode. + cluster.getStorageContainerManager().getClientProtocolServer() + .forceExitChillMode(); + // Test 3: SCM should be out of chill mode. + GenericTestUtils.waitFor(() -> { + try { + return !cluster.getStorageContainerManager().getClientProtocolServer() + .inChillMode(); + } catch (IOException e) { + Assert.fail("Cluster"); + return false; + } + }, 10, 1000 * 5); + + } + + @Test + public void testSCMChillMode() throws Exception { + MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf) + .setHbInterval(1000) + .setNumDatanodes(3) + .setStartDataNodes(false) + .setHbProcessorInterval(500); + MiniOzoneClusterImpl miniCluster = (MiniOzoneClusterImpl) clusterBuilder + .build(); + // Test1: Test chill mode when there are no containers in system. + assertTrue(miniCluster.getStorageContainerManager().isInChillMode()); + miniCluster.startHddsDatanodes(); + miniCluster.waitForClusterToBeReady(); + assertFalse(miniCluster.getStorageContainerManager().isInChillMode()); + + // Test2: Test chill mode when containers are there in system. + // Create {numKeys} random names keys. + TestStorageContainerManagerHelper helper = + new TestStorageContainerManagerHelper(miniCluster, conf); + Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096); + final List<ContainerInfo> containers = miniCluster + .getStorageContainerManager().getScmContainerManager() + .getStateManager().getAllContainers(); + GenericTestUtils.waitFor(() -> { + return containers.size() > 10; + }, 100, 1000 * 2); + + // Removing some container to keep them open. + containers.remove(0); + containers.remove(1); + containers.remove(2); + containers.remove(3); + + // Close remaining containers + ContainerMapping mapping = (ContainerMapping) miniCluster + .getStorageContainerManager().getScmContainerManager(); + containers.forEach(c -> { + try { + mapping.updateContainerState(c.getContainerID(), + HddsProtos.LifeCycleEvent.FINALIZE); + mapping.updateContainerState(c.getContainerID(), + LifeCycleEvent.CLOSE); + } catch (IOException e) { + LOG.info("Failed to change state of open containers.", e); + } + }); + miniCluster.stop(); + + GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer + .captureLogs(SCMChillModeManager.getLogger()); + logCapturer.clearOutput(); + AtomicReference<MiniOzoneCluster> miniClusterOzone + = new AtomicReference<>(); + new Thread(() -> { + try { + miniClusterOzone.set(clusterBuilder.setStartDataNodes(false).build()); + } catch (IOException e) { + fail("failed"); + } + }).start(); + + StorageContainerManager scm; + GenericTestUtils.waitFor(() -> { + return miniClusterOzone.get() != null; + }, 100, 1000 * 3); + + miniCluster = (MiniOzoneClusterImpl) miniClusterOzone.get(); + + scm = miniCluster.getStorageContainerManager(); + assertTrue(scm.isInChillMode()); + assertFalse(logCapturer.getOutput().contains("SCM exiting chill mode.")); + assertTrue(scm.getCurrentContainerThreshold() == 0); + AtomicDouble curThreshold = new AtomicDouble(); + AtomicDouble lastReportedThreshold = new AtomicDouble(); + for (HddsDatanodeService dn : miniCluster.getHddsDatanodes()) { + dn.start(null); + GenericTestUtils.waitFor(() -> { + curThreshold.set(scm.getCurrentContainerThreshold()); + return curThreshold.get() > lastReportedThreshold.get(); + }, 100, 1000 * 5); + lastReportedThreshold.set(curThreshold.get()); } + cluster = miniCluster; + double chillModeCutoff = conf + .getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT, + HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT); + assertTrue(scm.getCurrentContainerThreshold() >= chillModeCutoff); + assertTrue(logCapturer.getOutput().contains("SCM exiting chill mode.")); + assertFalse(scm.isInChillMode()); + } + + @Test + public void testSCMChillModeRestrictedOp() throws Exception { + conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, + OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB); + cluster.stop(); + cluster = builder.build(); + StorageContainerManager scm = cluster.getStorageContainerManager(); + assertTrue(scm.isInChillMode()); + + LambdaTestUtils.intercept(SCMException.class, + "ChillModePrecheck failed for allocateContainer", () -> { + scm.getClientProtocolServer() + .allocateContainer(ReplicationType.STAND_ALONE, + ReplicationFactor.ONE, ""); + }); + + cluster.startHddsDatanodes(); + cluster.waitForClusterToBeReady(); + assertFalse(scm.isInChillMode()); + + TestStorageContainerManagerHelper helper = + new TestStorageContainerManagerHelper(cluster, conf); + helper.createKeys(10, 4096); + SCMClientProtocolServer clientProtocolServer = cluster + .getStorageContainerManager().getClientProtocolServer(); + assertFalse((scm.getClientProtocolServer()).getChillModeStatus()); + final List<ContainerInfo> containers = scm.getScmContainerManager() + .getStateManager().getAllContainers(); + scm.getEventQueue().fireEvent(SCMEvents.CHILL_MODE_STATUS, true); + GenericTestUtils.waitFor(() -> { + return clientProtocolServer.getChillModeStatus(); + }, 50, 1000 * 5); + assertTrue(clientProtocolServer.getChillModeStatus()); + + LambdaTestUtils.intercept(SCMException.class, + "Open container " + containers.get(0).getContainerID() + " " + + "doesn't have enough replicas to service this operation in Chill" + + " mode.", () -> clientProtocolServer + .getContainerWithPipeline(containers.get(0).getContainerID())); } + } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
