This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 4670a5ef8c HDDS-11498. Improve SCM deletion efficiency. (#7249)
4670a5ef8c is described below
commit 4670a5ef8c98ae236d1acbaf93e9609309f88094
Author: slfan1989 <[email protected]>
AuthorDate: Thu Oct 17 13:12:17 2024 +0800
HDDS-11498. Improve SCM deletion efficiency. (#7249)
---
.../commandhandler/DeleteBlocksCommandHandler.java | 8 ++--
.../block/DatanodeDeletedBlockTransactions.java | 3 +-
.../hadoop/hdds/scm/block/DeletedBlockLogImpl.java | 56 ++++++++++++----------
.../hdds/scm/block/SCMBlockDeletingService.java | 3 +-
.../scm/block/ScmBlockDeletingServiceMetrics.java | 34 +++++++++++++
5 files changed, 75 insertions(+), 29 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
index 1a630f8f0b..136c580582 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
@@ -359,10 +359,11 @@ public class DeleteBlocksCommandHandler implements
CommandHandler {
DeletedContainerBlocksSummary summary =
DeletedContainerBlocksSummary.getFrom(containerBlocks);
LOG.info("Summary of deleting container blocks, numOfTransactions={}, "
- + "numOfContainers={}, numOfBlocks={}",
+ + "numOfContainers={}, numOfBlocks={}, commandId={}.",
summary.getNumOfTxs(),
summary.getNumOfContainers(),
- summary.getNumOfBlocks());
+ summary.getNumOfBlocks(),
+ cmd.getCmd().getId());
if (LOG.isDebugEnabled()) {
LOG.debug("Start to delete container blocks, TXIDs={}",
summary.getTxIDSummary());
@@ -389,7 +390,8 @@ public class DeleteBlocksCommandHandler implements
CommandHandler {
LOG.debug("Sending following block deletion ACK to SCM");
for (DeleteBlockTransactionResult result : blockDeletionACK
.getResultsList()) {
- LOG.debug("{} : {}", result.getTxID(), result.getSuccess());
+ LOG.debug("TxId = {} : ContainerId = {} : {}",
+ result.getTxID(), result.getContainerID(),
result.getSuccess());
}
}
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java
index e485fcc98d..99fd9c7b43 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java
@@ -47,7 +47,8 @@ class DatanodeDeletedBlockTransactions {
blocksDeleted += tx.getLocalIDCount();
if (SCMBlockDeletingService.LOG.isDebugEnabled()) {
SCMBlockDeletingService.LOG
- .debug("Transaction added: {} <- TX({})", dnID, tx.getTxID());
+ .debug("Transaction added: {} <- TX({}), DN {} <- blocksDeleted Add
{}.",
+ dnID, tx.getTxID(), dnID, tx.getLocalIDCount());
}
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
index 9d5377b9e3..987f293dd1 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
@@ -35,7 +35,6 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandStatus;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto;
-import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto.DeleteBlockTransactionResult;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
import
org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler.DeleteBlockStatus;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
@@ -200,20 +199,6 @@ public class DeletedBlockLogImpl
.build();
}
- private boolean isTransactionFailed(DeleteBlockTransactionResult result) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(
- "Got block deletion ACK from datanode, TXIDs={}, " + "success={}",
- result.getTxID(), result.getSuccess());
- }
- if (!result.getSuccess()) {
- LOG.warn("Got failed ACK for TXID={}, prepare to resend the "
- + "TX in next interval", result.getTxID());
- return true;
- }
- return false;
- }
-
@Override
public int getNumOfValidTransactions() throws IOException {
lock.lock();
@@ -300,26 +285,46 @@ public class DeletedBlockLogImpl
.setCount(transactionStatusManager.getOrDefaultRetryCount(
tx.getTxID(), 0))
.build();
+
for (ContainerReplica replica : replicas) {
DatanodeDetails details = replica.getDatanodeDetails();
- if (!dnList.contains(details)) {
- continue;
- }
if (!transactionStatusManager.isDuplication(
details, updatedTxn.getTxID(), commandStatus)) {
transactions.addTransactionToDN(details.getUuid(), updatedTxn);
+ metrics.incrProcessedTransaction();
}
}
}
private Boolean checkInadequateReplica(Set<ContainerReplica> replicas,
- DeletedBlocksTransaction txn) throws ContainerNotFoundException {
+ DeletedBlocksTransaction txn,
+ Set<DatanodeDetails> dnList) throws ContainerNotFoundException {
ContainerInfo containerInfo = containerManager
.getContainer(ContainerID.valueOf(txn.getContainerID()));
ReplicationManager replicationManager =
scmContext.getScm().getReplicationManager();
ContainerHealthResult result = replicationManager
.getContainerReplicationHealth(containerInfo, replicas);
+
+ // We have made an improvement here, and we expect that all replicas
+ // of the Container being sent will be included in the dnList.
+ // This change benefits ACK confirmation and improves deletion speed.
+ // The principle behind it is that
+ // DN can receive the command to delete a certain Container at the same
time and provide
+ // feedback to SCM at roughly the same time.
+ // This avoids the issue of deletion blocking,
+ // where some replicas of a Container are deleted while others do not
receive the delete command.
+ long containerId = txn.getContainerID();
+ for (ContainerReplica replica : replicas) {
+ DatanodeDetails datanodeDetails = replica.getDatanodeDetails();
+ if (!dnList.contains(datanodeDetails)) {
+ DatanodeDetails dnDetail = replica.getDatanodeDetails();
+ LOG.debug("Skip Container = {}, because DN = {} is not in dnList.",
+ containerId, dnDetail.getUuid());
+ return true;
+ }
+ }
+
return result.getHealthState() !=
ContainerHealthResult.HealthState.HEALTHY;
}
@@ -345,6 +350,7 @@ public class DeletedBlockLogImpl
.getCommandStatusByTxId(dnList.stream().
map(DatanodeDetails::getUuid).collect(Collectors.toSet()));
ArrayList<Long> txIDs = new ArrayList<>();
+ metrics.setNumBlockDeletionTransactionDataNodes(dnList.size());
// Here takes block replica count as the threshold to avoid the case
// that part of replicas committed the TXN and recorded in the
// SCMDeletedBlockTransactionStatusManager, while they are counted
@@ -358,23 +364,25 @@ public class DeletedBlockLogImpl
// HDDS-7126. When container is under replicated, it is possible
// that container is deleted, but transactions are not deleted.
if (containerManager.getContainer(id).isDeleted()) {
- LOG.warn("Container: " + id + " was deleted for the " +
- "transaction: " + txn);
+ LOG.warn("Container: {} was deleted for the " +
+ "transaction: {}.", id, txn);
txIDs.add(txn.getTxID());
} else if (txn.getCount() > -1 && txn.getCount() <= maxRetry
&& !containerManager.getContainer(id).isOpen()) {
Set<ContainerReplica> replicas = containerManager
.getContainerReplicas(
ContainerID.valueOf(txn.getContainerID()));
- if (checkInadequateReplica(replicas, txn)) {
+ if (checkInadequateReplica(replicas, txn, dnList)) {
+ metrics.incrSkippedTransaction();
continue;
}
getTransaction(
txn, transactions, dnList, replicas, commandStatus);
+ } else if (txn.getCount() >= maxRetry ||
containerManager.getContainer(id).isOpen()) {
+ metrics.incrSkippedTransaction();
}
} catch (ContainerNotFoundException ex) {
- LOG.warn("Container: " + id + " was not found for the transaction:
"
- + txn);
+ LOG.warn("Container: {} was not found for the transaction: {}.",
id, txn);
txIDs.add(txn.getTxID());
}
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
index 7271d9dcba..e6fc45cb5e 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
@@ -203,9 +203,10 @@ public class SCMBlockDeletingService extends
BackgroundService
}
}
LOG.info("Totally added {} blocks to be deleted for"
- + " {} datanodes, task elapsed time: {}ms",
+ + " {} datanodes / {} totalnodes, task elapsed time: {}ms",
transactions.getBlocksDeleted(),
transactions.getDatanodeTransactionMap().size(),
+ included.size(),
Time.monotonicNow() - startTime);
deletedBlockLog.incrementCount(new ArrayList<>(processedTxIDs));
} catch (NotLeaderException nle) {
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java
index 2cadca1d92..6637bd1832 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
/**
* Metrics related to Block Deleting Service running in SCM.
@@ -76,6 +77,15 @@ public final class ScmBlockDeletingServiceMetrics {
@Metric(about = "The number of created txs which are added into DB.")
private MutableCounterLong numBlockDeletionTransactionCreated;
+ @Metric(about = "The number of skipped transactions")
+ private MutableCounterLong numSkippedTransactions;
+
+ @Metric(about = "The number of processed transactions")
+ private MutableCounterLong numProcessedTransactions;
+
+ @Metric(about = "The number of dataNodes of delete transactions.")
+ private MutableGaugeLong numBlockDeletionTransactionDataNodes;
+
private ScmBlockDeletingServiceMetrics() {
}
@@ -130,6 +140,18 @@ public final class ScmBlockDeletingServiceMetrics {
this.numBlockDeletionTransactionCreated.incr(count);
}
+ public void incrSkippedTransaction() {
+ this.numSkippedTransactions.incr();
+ }
+
+ public void incrProcessedTransaction() {
+ this.numProcessedTransactions.incr();
+ }
+
+ public void setNumBlockDeletionTransactionDataNodes(long dataNodes) {
+ this.numBlockDeletionTransactionDataNodes.set(dataNodes);
+ }
+
public long getNumBlockDeletionCommandSent() {
return numBlockDeletionCommandSent.value();
}
@@ -162,6 +184,18 @@ public final class ScmBlockDeletingServiceMetrics {
return numBlockDeletionTransactionCreated.value();
}
+ public long getNumSkippedTransactions() {
+ return numSkippedTransactions.value();
+ }
+
+ public long getNumProcessedTransactions() {
+ return numProcessedTransactions.value();
+ }
+
+ public long getNumBlockDeletionTransactionDataNodes() {
+ return numBlockDeletionTransactionDataNodes.value();
+ }
+
@Override
public String toString() {
StringBuffer buffer = new StringBuffer();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]