This is an automated email from the ASF dual-hosted git repository.
sammichen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new e97ea36 HDDS-5679. Use more defensive sizeRequired for replication
manager for container replication. (#2585)
e97ea36 is described below
commit e97ea36b596ad66c3476181d9d6f428a0204dbaa
Author: Gui Hecheng <[email protected]>
AuthorDate: Tue Sep 14 12:18:58 2021 +0800
HDDS-5679. Use more defensive sizeRequired for replication manager for
container replication. (#2585)
---
.../hadoop/hdds/scm/container/ReplicationManager.java | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
index 009c850..90bbd97 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.hdds.conf.Config;
import org.apache.hadoop.hdds.conf.ConfigGroup;
import org.apache.hadoop.hdds.conf.ConfigType;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
@@ -59,6 +60,7 @@ import static org.apache.hadoop.hdds.protocol.proto.
SCMRatisProtocol.RequestType.MOVE;
import org.apache.hadoop.hdds.scm.ContainerPlacementStatus;
import org.apache.hadoop.hdds.scm.PlacementPolicy;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import
org.apache.hadoop.hdds.scm.container.replication.ReplicationManagerMetrics;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
@@ -226,6 +228,12 @@ public class ReplicationManager implements SCMService {
private int minHealthyForMaintenance;
/**
+ * Current container size as a bound for choosing datanodes with
+ * enough space for a replica.
+ */
+ private long currentContainerSize;
+
+ /**
* SCMService related variables.
* After leaving safe mode, replicationMonitor needs to wait for a while
* before really take effect.
@@ -283,6 +291,10 @@ public class ReplicationManager implements SCMService {
HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT,
HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT_DEFAULT,
TimeUnit.MILLISECONDS);
+ this.currentContainerSize = (long) conf.getStorageSize(
+ ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
+ ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT,
+ StorageUnit.BYTES);
this.metrics = null;
moveScheduler = new MoveSchedulerImpl.Builder()
@@ -1131,13 +1143,18 @@ public class ReplicationManager implements SCMService {
return;
}
+ // We should ensure that the target datanode has enough space
+ // for a complete container to be created, but since the container
+ // size may be changed smaller than origin, we should be defensive.
+ final long dataSizeRequired = Math.max(container.getUsedBytes(),
+ currentContainerSize);
final List<DatanodeDetails> excludeList = replicas.stream()
.map(ContainerReplica::getDatanodeDetails)
.collect(Collectors.toList());
excludeList.addAll(replicationInFlight);
final List<DatanodeDetails> selectedDatanodes = containerPlacement
.chooseDatanodes(excludeList, null, replicasNeeded,
- 0, container.getUsedBytes());
+ 0, dataSizeRequired);
if (repDelta > 0) {
LOG.info("Container {} is under replicated. Expected replica count" +
" is {}, but found {}.", id, replicationFactor,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]