This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new b3b34150e7 HDDS-8850. ReplicationManager: Add metrics for partial
replication and cluster limit (#4917)
b3b34150e7 is described below
commit b3b34150e791cceff239f195193bb1c779eeeea1
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Sat Jun 17 16:04:51 2023 +0100
HDDS-8850. ReplicationManager: Add metrics for partial replication and
cluster limit (#4917)
---
.../replication/ECUnderReplicationHandler.java | 14 ++-
.../replication/MisReplicationHandler.java | 7 ++
.../replication/RatisUnderReplicationHandler.java | 3 +
.../replication/ReplicationManagerMetrics.java | 104 +++++++++++++++++++++
.../replication/UnhealthyReplicationProcessor.java | 2 +
.../replication/TestECMisReplicationHandler.java | 3 +
.../replication/TestECUnderReplicationHandler.java | 20 ++++
.../replication/TestMisReplicationHandler.java | 7 ++
.../TestRatisUnderReplicationHandler.java | 5 +
.../replication/TestUnderReplicatedProcessor.java | 5 +-
10 files changed, 166 insertions(+), 4 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java
index e6aee0de6a..2afd3c53db 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java
@@ -64,6 +64,7 @@ public class ECUnderReplicationHandler implements
UnhealthyReplicationHandler {
private final PlacementPolicy containerPlacement;
private final long currentContainerSize;
private final ReplicationManager replicationManager;
+ private final ReplicationManagerMetrics metrics;
ECUnderReplicationHandler(final PlacementPolicy containerPlacement,
final ConfigurationSource conf, ReplicationManager replicationManager) {
@@ -72,6 +73,7 @@ public class ECUnderReplicationHandler implements
UnhealthyReplicationHandler {
.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
this.replicationManager = replicationManager;
+ this.metrics = replicationManager.getMetrics();
}
private boolean validatePlacement(List<DatanodeDetails> replicaNodes,
@@ -281,6 +283,7 @@ public class ECUnderReplicationHandler implements
UnhealthyReplicationHandler {
(ECReplicationConfig)container.getReplicationConfig();
List<Integer> missingIndexes = replicaCount.unavailableIndexes(true);
final int expectedTargetCount = missingIndexes.size();
+ boolean recoveryIsCritical = expectedTargetCount == repConfig.getParity();
if (expectedTargetCount == 0) {
return 0;
}
@@ -307,7 +310,7 @@ public class ECUnderReplicationHandler implements
UnhealthyReplicationHandler {
// selection allows partial recovery
0 < targetCount && targetCount < expectedTargetCount &&
// recovery is not yet critical
- expectedTargetCount < repConfig.getParity()) {
+ !recoveryIsCritical) {
// check if placement exists when overloaded nodes are not excluded
final List<DatanodeDetails> targetsMaybeOverloaded =
getTargetDatanodes(
@@ -319,7 +322,7 @@ public class ECUnderReplicationHandler implements
UnhealthyReplicationHandler {
+ " target nodes to be fully reconstructed, but {} selected"
+ " nodes are currently overloaded.",
container.getContainerID(), expectedTargetCount,
overloadedCount);
-
+ metrics.incrECPartialReconstructionSkippedTotal();
throw new InsufficientDatanodesException(expectedTargetCount,
targetCount);
}
@@ -369,6 +372,11 @@ public class ECUnderReplicationHandler implements
UnhealthyReplicationHandler {
LOG.debug("Insufficient nodes were returned from the placement policy"
+
" to fully reconstruct container {}. Requested {} received {}",
container.getContainerID(), expectedTargetCount, targetCount);
+ if (hasOverloaded && recoveryIsCritical) {
+ metrics.incrECPartialReconstructionCriticalTotal();
+ } else {
+ metrics.incrEcPartialReconstructionNoneOverloadedTotal();
+ }
throw new InsufficientDatanodesException(expectedTargetCount,
targetCount);
}
@@ -454,6 +462,7 @@ public class ECUnderReplicationHandler implements
UnhealthyReplicationHandler {
" to fully replicate the decommission indexes for container {}." +
" Requested {} received {}", container.getContainerID(),
decomIndexes.size(), selectedDatanodes.size());
+ metrics.incrEcPartialReplicationForOutOfServiceReplicasTotal();
throw new InsufficientDatanodesException(decomIndexes.size(),
selectedDatanodes.size());
}
@@ -538,6 +547,7 @@ public class ECUnderReplicationHandler implements
UnhealthyReplicationHandler {
" to fully replicate the maintenance indexes for container {}." +
" Requested {} received {}", container.getContainerID(),
maintIndexes.size(), targets.size());
+ metrics.incrEcPartialReplicationForOutOfServiceReplicasTotal();
throw new InsufficientDatanodesException(maintIndexes.size(),
targets.size());
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/MisReplicationHandler.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/MisReplicationHandler.java
index d41f80d604..6dcec13c50 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/MisReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/MisReplicationHandler.java
@@ -56,6 +56,7 @@ public abstract class MisReplicationHandler implements
private final PlacementPolicy containerPlacement;
private final long currentContainerSize;
private final ReplicationManager replicationManager;
+ private final ReplicationManagerMetrics metrics;
public MisReplicationHandler(
final PlacementPolicy containerPlacement,
@@ -65,6 +66,7 @@ public abstract class MisReplicationHandler implements
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
this.replicationManager = replicationManager;
+ this.metrics = replicationManager.getMetrics();
}
protected ReplicationManager getReplicationManager() {
@@ -164,6 +166,11 @@ public abstract class MisReplicationHandler implements
int found = targetDatanodes.size();
if (found < requiredNodes) {
+ if (container.getReplicationType() == HddsProtos.ReplicationType.EC) {
+ metrics.incrEcPartialReplicationForMisReplicationTotal();
+ } else {
+ metrics.incrPartialReplicationForMisReplicationTotal();
+ }
LOG.warn("Placement Policy {} found only {} nodes for Container: {}," +
" number of required nodes: {}, usedNodes : {}",
containerPlacement.getClass(), found,
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
index ae14c20f47..0e4c4b1d42 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
@@ -55,6 +55,7 @@ public class RatisUnderReplicationHandler
private final PlacementPolicy placementPolicy;
private final long currentContainerSize;
private final ReplicationManager replicationManager;
+ private final ReplicationManagerMetrics metrics;
public RatisUnderReplicationHandler(final PlacementPolicy placementPolicy,
final ConfigurationSource conf,
@@ -64,6 +65,7 @@ public class RatisUnderReplicationHandler
.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
this.replicationManager = replicationManager;
+ this.metrics = replicationManager.getMetrics();
}
/**
@@ -128,6 +130,7 @@ public class RatisUnderReplicationHandler
"additional replicas needed: {}",
containerInfo, targetDatanodes.size(),
replicaCount.additionalReplicaNeeded());
+ metrics.incrPartialReplicationTotal();
throw new InsufficientDatanodesException(
replicaCount.additionalReplicaNeeded(), targetDatanodes.size());
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java
index 219f74ccf0..5c3ee4e29a 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java
@@ -155,6 +155,10 @@ public final class ReplicationManagerMetrics implements
MetricsSource {
" due to the configured limit.")
private MutableCounterLong inflightDeletionSkippedTotal;
+ @Metric("Number of times under replication processing has paused due to" +
+ " reaching the cluster inflight replication limit.")
+ private MutableCounterLong pendingReplicationLimitReachedTotal;
+
private MetricsRegistry registry;
private final ReplicationManager replicationManager;
@@ -183,6 +187,34 @@ public final class ReplicationManagerMetrics implements
MetricsSource {
@Metric("Number of EC replicas scheduled for delete which timed out.")
private MutableCounterLong ecReplicaDeleteTimeoutTotal;
+ @Metric("Number of times partial EC reconstruction was needed due to " +
+ "overloaded nodes, but skipped as there was still sufficient
redundancy.")
+ private MutableCounterLong ecPartialReconstructionSkippedTotal;
+
+ @Metric("Number of times partial EC reconstruction was used due to " +
+ "insufficient nodes available and reconstruction was critical.")
+ private MutableCounterLong ecPartialReconstructionCriticalTotal;
+
+ @Metric("Number of times partial EC reconstruction was used due to " +
+ "insufficient nodes available and with no overloaded nodes.")
+ private MutableCounterLong ecPartialReconstructionNoneOverloadedTotal;
+
+ @Metric("Number of times EC decommissioning or entering maintenance mode " +
+ "replicas were not all replicated due to insufficient nodes available.")
+ private MutableCounterLong ecPartialReplicationForOutOfServiceReplicasTotal;
+
+ @Metric("Number of times partial Ratis replication occurred due to " +
+ "insufficient nodes available.")
+ private MutableCounterLong partialReplicationTotal;
+
+ @Metric("Number of times partial replication occurred to fix a " +
+ "mis-replicated ratis container due to insufficient nodes available.")
+ private MutableCounterLong partialReplicationForMisReplicationTotal;
+
+ @Metric("Number of times partial replication occurred to fix a " +
+ "mis-replicated EC container due to insufficient nodes available.")
+ private MutableCounterLong ecPartialReplicationForMisReplicationTotal;
+
@Metric("NUmber of Reconstruct EC Container commands that could not be sent "
+ "due to the pending commands on the target datanode")
private MutableCounterLong ecReconstructionCmdsDeferredTotal;
@@ -272,6 +304,14 @@ public final class ReplicationManagerMetrics implements
MetricsSource {
ecReconstructionCmdsDeferredTotal.snapshot(builder, all);
deleteContainerCmdsDeferredTotal.snapshot(builder, all);
replicateContainerCmdsDeferredTotal.snapshot(builder, all);
+ pendingReplicationLimitReachedTotal.snapshot(builder, all);
+ ecPartialReconstructionSkippedTotal.snapshot(builder, all);
+ ecPartialReconstructionCriticalTotal.snapshot(builder, all);
+ ecPartialReconstructionNoneOverloadedTotal.snapshot(builder, all);
+ ecPartialReplicationForOutOfServiceReplicasTotal.snapshot(builder, all);
+ partialReplicationTotal.snapshot(builder, all);
+ ecPartialReplicationForMisReplicationTotal.snapshot(builder, all);
+ partialReplicationForMisReplicationTotal.snapshot(builder, all);
}
public void unRegister() {
@@ -505,4 +545,68 @@ public final class ReplicationManagerMetrics implements
MetricsSource {
return replicateContainerCmdsDeferredTotal.value();
}
+ public void incrPendingReplicationLimitReachedTotal() {
+ this.pendingReplicationLimitReachedTotal.incr();
+ }
+
+ public long getPendingReplicationLimitReachedTotal() {
+ return pendingReplicationLimitReachedTotal.value();
+ }
+
+ public long getECPartialReconstructionSkippedTotal() {
+ return ecPartialReconstructionSkippedTotal.value();
+ }
+
+ public void incrECPartialReconstructionSkippedTotal() {
+ this.ecPartialReconstructionSkippedTotal.incr();
+ }
+
+ public long getECPartialReconstructionCriticalTotal() {
+ return ecPartialReconstructionCriticalTotal.value();
+ }
+
+ public void incrECPartialReconstructionCriticalTotal() {
+ this.ecPartialReconstructionCriticalTotal.incr();
+ }
+
+ public long getEcPartialReconstructionNoneOverloadedTotal() {
+ return ecPartialReconstructionNoneOverloadedTotal.value();
+ }
+
+ public void incrEcPartialReconstructionNoneOverloadedTotal() {
+ this.ecPartialReconstructionNoneOverloadedTotal.incr();
+ }
+
+ public long getEcPartialReplicationForOutOfServiceReplicasTotal() {
+ return ecPartialReplicationForOutOfServiceReplicasTotal.value();
+ }
+
+ public void incrEcPartialReplicationForOutOfServiceReplicasTotal() {
+ this.ecPartialReplicationForOutOfServiceReplicasTotal.incr();
+ }
+
+ public long getPartialReplicationTotal() {
+ return partialReplicationTotal.value();
+ }
+
+ public void incrPartialReplicationTotal() {
+ this.partialReplicationTotal.incr();
+ }
+
+ public void incrEcPartialReplicationForMisReplicationTotal() {
+ this.ecPartialReplicationForMisReplicationTotal.incr();
+ }
+
+ public long getEcPartialReplicationForMisReplicationTotal() {
+ return this.ecPartialReplicationForMisReplicationTotal.value();
+ }
+
+ public void incrPartialReplicationForMisReplicationTotal() {
+ this.partialReplicationForMisReplicationTotal.incr();
+ }
+
+ public long getPartialReplicationForMisReplicationTotal() {
+ return this.partialReplicationForMisReplicationTotal.value();
+ }
+
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnhealthyReplicationProcessor.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnhealthyReplicationProcessor.java
index 1f04edd2ee..13ed8f87e3 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnhealthyReplicationProcessor.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnhealthyReplicationProcessor.java
@@ -103,6 +103,8 @@ public abstract class
UnhealthyReplicationProcessor<HealthResult extends
inflightOperationLimitReached(replicationManager, inflightLimit)) {
LOG.info("The maximum number of pending replicas ({}) are scheduled. "
+
"Ending the iteration.", inflightLimit);
+ replicationManager
+ .getMetrics().incrPendingReplicationLimitReachedTotal();
break;
}
HealthResult healthResult =
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECMisReplicationHandler.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECMisReplicationHandler.java
index c68b1cb8c5..5022d9ca5c 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECMisReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECMisReplicationHandler.java
@@ -45,6 +45,7 @@ import java.util.Set;
import static java.util.Collections.singletonList;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThrows;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
@@ -216,6 +217,8 @@ public class TestECMisReplicationHandler extends
TestMisReplicationHandler {
assertThrows(InsufficientDatanodesException.class,
() -> testMisReplication(availableReplicas, Collections.emptyList(),
0, 2, 1));
+ assertEquals(1,
+ getMetrics().getEcPartialReplicationForMisReplicationTotal());
}
@Override
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java
index 6fc6b1c0eb..be95bcee60 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java
@@ -110,6 +110,7 @@ public class TestECUnderReplicationHandler {
private ContainerInfo container;
private NodeManager nodeManager;
private ReplicationManager replicationManager;
+ private ReplicationManagerMetrics metrics;
private OzoneConfiguration conf;
private PlacementPolicy policy;
private static final int DATA = 3;
@@ -137,6 +138,8 @@ public class TestECUnderReplicationHandler {
new ReplicationManager.ReplicationManagerConfiguration();
when(replicationManager.getConfig())
.thenReturn(rmConf);
+ metrics = ReplicationManagerMetrics.create(replicationManager);
+ when(replicationManager.getMetrics()).thenReturn(metrics);
when(replicationManager.getNodeStatus(any(DatanodeDetails.class)))
.thenAnswer(invocation -> {
@@ -200,6 +203,7 @@ public class TestECUnderReplicationHandler {
assertEquals(e.getRequiredNodes() - excluded.size(),
e.getAvailableNodes());
verify(replicationManager, never())
.sendThrottledReconstructionCommand(any(), any());
+ assertEquals(1, metrics.getECPartialReconstructionSkippedTotal());
}
private static UnderReplicatedHealthResult mockUnderReplicated(
@@ -258,6 +262,7 @@ public class TestECUnderReplicationHandler {
assertEquals(e.getRequiredNodes() - excluded.size(),
e.getAvailableNodes());
verify(replicationManager, times(1))
.sendThrottledReconstructionCommand(any(), any());
+ assertEquals(1, metrics.getECPartialReconstructionCriticalTotal());
}
@Test
@@ -711,6 +716,7 @@ public class TestECUnderReplicationHandler {
ReconstructECContainersCommand cmd = (ReconstructECContainersCommand)
commandsSent.iterator().next().getValue();
assertEquals(1, cmd.getTargetDatanodes().size());
+ assertEquals(1, metrics.getEcPartialReconstructionNoneOverloadedTotal());
}
@Test
@@ -759,6 +765,8 @@ public class TestECUnderReplicationHandler {
SCMCommand<?> cmd = commandsSent.iterator().next().getValue();
assertEquals(
SCMCommandProto.Type.replicateContainerCommand, cmd.getType());
+ assertEquals(1,
+ metrics.getEcPartialReplicationForOutOfServiceReplicasTotal());
}
@Test
@@ -782,6 +790,11 @@ public class TestECUnderReplicationHandler {
SCMCommand<?> cmd = commandsSent.iterator().next().getValue();
assertEquals(
SCMCommandProto.Type.replicateContainerCommand, cmd.getType());
+ // The partial recovery here is due to overloaded nodes, not insufficient
+ // nodes. The "deferred" metric should be updated when all sources are
+ // overloaded.
+ assertEquals(0,
+ metrics.getEcPartialReplicationForOutOfServiceReplicasTotal());
}
@Test
@@ -807,6 +820,8 @@ public class TestECUnderReplicationHandler {
SCMCommand<?> cmd = commandsSent.iterator().next().getValue();
assertEquals(
SCMCommandProto.Type.replicateContainerCommand, cmd.getType());
+ assertEquals(1,
+ metrics.getEcPartialReplicationForOutOfServiceReplicasTotal());
}
@Test
@@ -831,6 +846,11 @@ public class TestECUnderReplicationHandler {
SCMCommand<?> cmd = commandsSent.iterator().next().getValue();
assertEquals(
SCMCommandProto.Type.replicateContainerCommand, cmd.getType());
+ // The partial recovery here is due to overloaded nodes, not insufficient
+ // nodes. The "deferred" metric should be updated when all sources are
+ // overloaded.
+ assertEquals(0,
+ metrics.getEcPartialReplicationForOutOfServiceReplicasTotal());
}
@Test
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestMisReplicationHandler.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestMisReplicationHandler.java
index eb5f068e46..9c2874740f 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestMisReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestMisReplicationHandler.java
@@ -72,6 +72,7 @@ public abstract class TestMisReplicationHandler {
private Set<Pair<DatanodeDetails, SCMCommand<?>>> commandsSent;
private final AtomicBoolean throwThrottledException =
new AtomicBoolean(false);
+ private ReplicationManagerMetrics metrics;
protected void setup(ReplicationConfig repConfig)
throws NodeNotFoundException, CommandTargetOverloadedException,
@@ -89,6 +90,8 @@ public abstract class TestMisReplicationHandler {
conf.getObject(ReplicationManagerConfiguration.class);
Mockito.when(replicationManager.getConfig())
.thenReturn(rmConf);
+ metrics = ReplicationManagerMetrics.create(replicationManager);
+ Mockito.when(replicationManager.getMetrics()).thenReturn(metrics);
commandsSent = new HashSet<>();
ReplicationTestUtil.mockRMSendDatanodeCommand(
@@ -107,6 +110,10 @@ public abstract class TestMisReplicationHandler {
return replicationManager;
}
+ protected ReplicationManagerMetrics getMetrics() {
+ return metrics;
+ }
+
protected void setThrowThrottledException(boolean showThrow) {
throwThrottledException.set(showThrow);
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
index fe0caf8810..e2cdea09cc 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
@@ -76,6 +76,7 @@ public class TestRatisUnderReplicationHandler {
private PlacementPolicy policy;
private ReplicationManager replicationManager;
private Set<Pair<DatanodeDetails, SCMCommand<?>>> commandsSent;
+ private ReplicationManagerMetrics metrics;
@Before
public void setup() throws NodeNotFoundException,
@@ -93,6 +94,8 @@ public class TestRatisUnderReplicationHandler {
Mockito.when(replicationManager.getConfig())
.thenReturn(ozoneConfiguration.getObject(
ReplicationManagerConfiguration.class));
+ metrics = ReplicationManagerMetrics.create(replicationManager);
+ Mockito.when(replicationManager.getMetrics()).thenReturn(metrics);
/*
Return NodeStatus with NodeOperationalState as specified in
@@ -220,6 +223,7 @@ public class TestRatisUnderReplicationHandler {
Assert.assertThrows(IOException.class,
() -> handler.processAndSendCommands(replicas,
Collections.emptyList(), getUnderReplicatedHealthResult(), 2));
+ Assert.assertEquals(0, metrics.getPartialReplicationTotal());
}
@Test
@@ -239,6 +243,7 @@ public class TestRatisUnderReplicationHandler {
// One command should be sent to the replication manager as we could only
// fine one node rather than two.
Assert.assertEquals(1, commandsSent.size());
+ Assert.assertEquals(1, metrics.getPartialReplicationTotal());
}
@Test
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestUnderReplicatedProcessor.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestUnderReplicatedProcessor.java
index b604ae3e2b..1b01482be4 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestUnderReplicatedProcessor.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestUnderReplicatedProcessor.java
@@ -44,6 +44,7 @@ public class TestUnderReplicatedProcessor {
private ECReplicationConfig repConfig;
private UnderReplicatedProcessor underReplicatedProcessor;
private ReplicationQueue queue;
+ private ReplicationManagerMetrics rmMetrics;
@Before
public void setup() {
@@ -57,8 +58,7 @@ public class TestUnderReplicatedProcessor {
repConfig = new ECReplicationConfig(3, 2);
Mockito.when(replicationManager.shouldRun()).thenReturn(true);
Mockito.when(replicationManager.getConfig()).thenReturn(rmConf);
- ReplicationManagerMetrics rmMetrics =
- ReplicationManagerMetrics.create(replicationManager);
+ rmMetrics = ReplicationManagerMetrics.create(replicationManager);
Mockito.when(replicationManager.getMetrics())
.thenReturn(rmMetrics);
Mockito.when(replicationManager.getReplicationInFlightLimit())
@@ -132,6 +132,7 @@ public class TestUnderReplicatedProcessor {
// We should have processed the message now
Mockito.verify(replicationManager, Mockito.times(1))
.processUnderReplicatedContainer(any());
+ assertEquals(1, rmMetrics.getPendingReplicationLimitReachedTotal());
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]