[
https://issues.apache.org/jira/browse/HDFS-16043?focusedWorklogId=612281&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-612281
]
ASF GitHub Bot logged work on HDFS-16043:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 19/Jun/21 10:25
Start Date: 19/Jun/21 10:25
Worklog Time Spent: 10m
Work Description: Hexiaoqiao commented on a change in pull request #3063:
URL: https://github.com/apache/hadoop/pull/3063#discussion_r654778036
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
##########
@@ -4964,6 +4990,73 @@ public long getLastRedundancyMonitorTS() {
return lastRedundancyCycleTS.get();
}
+ /**
+ * Periodically deletes the marked block.
+ */
+ private class MarkedDeleteBlockScrubber implements Runnable {
+ private Iterator<BlockInfo> toDeleteIterator = null;
+ private boolean isSleep;
+
+ private void toRemove(long time) {
+ // Reentrant write lock, Release the lock when the remove is
+ // complete
+ if (checkToDeleteIterator()) {
+ namesystem.writeLock();
+ try {
+ while (toDeleteIterator.hasNext()) {
+ removeBlock(toDeleteIterator.next());
+ if (Time.now() - time > deleteBlockLockTimeMs) {
+ LOG.info("Clear markedDeleteQueue over " + deleteBlockLockTimeMs
+ + " millisecond to release the write lock");
+ isSleep = true;
+ break;
+ }
+ }
+ } finally {
+ namesystem.writeUnlock();
+ }
+ }
+ }
+
+ private boolean checkToDeleteIterator() {
+ return toDeleteIterator != null && toDeleteIterator.hasNext();
+ }
+
+ @Override
+ public void run() {
+ LOG.info("Start MarkedDeleteBlockScrubber thread");
+ while (namesystem.isRunning()) {
+ if (!markedDeleteQueue.isEmpty() || checkToDeleteIterator()) {
+ namesystem.writeLock();
+ try {
+ NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
+ metrics.setDeleteBlocksQueued(markedDeleteQueue.size());
+ isSleep = false;
+ long startTime = Time.now();
+ toRemove(startTime);
+ while (!markedDeleteQueue.isEmpty()) {
+ List<BlockInfo> markedDeleteList = markedDeleteQueue.poll();
+ if (markedDeleteList != null) {
+ toDeleteIterator = markedDeleteList.listIterator();
+ }
+ toRemove(startTime);
+ if (isSleep) {
+ break;
+ }
+ }
+ } finally {
+ namesystem.writeUnlock();
+ }
+ }
+ try {
+ TimeUnit.MILLISECONDS.sleep(deleteBlockUnlockIntervalTimeMs);
Review comment:
Why not using `Thread.sleep` directly?
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
##########
@@ -1010,7 +1010,8 @@ public Boolean get() {
@Test
public void testTotalBlocksMetrics() throws Exception {
MiniDFSCluster cluster = null;
- FSNamesystem namesystem = null;
+ FSNamesystem activeNn = null;
+ FSNamesystem backUpNn = null;
Review comment:
Rename `backupNn` to `standbyNn` is more acceptable here.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
##########
@@ -4964,6 +4990,73 @@ public long getLastRedundancyMonitorTS() {
return lastRedundancyCycleTS.get();
}
+ /**
+ * Periodically deletes the marked block.
+ */
+ private class MarkedDeleteBlockScrubber implements Runnable {
+ private Iterator<BlockInfo> toDeleteIterator = null;
+ private boolean isSleep;
+
+ private void toRemove(long time) {
+ // Reentrant write lock, Release the lock when the remove is
+ // complete
+ if (checkToDeleteIterator()) {
+ namesystem.writeLock();
+ try {
+ while (toDeleteIterator.hasNext()) {
+ removeBlock(toDeleteIterator.next());
+ if (Time.now() - time > deleteBlockLockTimeMs) {
+ LOG.info("Clear markedDeleteQueue over " + deleteBlockLockTimeMs
Review comment:
It is better to log out of the global lock.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
##########
@@ -4964,6 +4990,73 @@ public long getLastRedundancyMonitorTS() {
return lastRedundancyCycleTS.get();
}
+ /**
+ * Periodically deletes the marked block.
+ */
+ private class MarkedDeleteBlockScrubber implements Runnable {
+ private Iterator<BlockInfo> toDeleteIterator = null;
+ private boolean isSleep;
+
+ private void toRemove(long time) {
+ // Reentrant write lock, Release the lock when the remove is
+ // complete
+ if (checkToDeleteIterator()) {
+ namesystem.writeLock();
+ try {
+ while (toDeleteIterator.hasNext()) {
+ removeBlock(toDeleteIterator.next());
+ if (Time.now() - time > deleteBlockLockTimeMs) {
+ LOG.info("Clear markedDeleteQueue over " + deleteBlockLockTimeMs
+ + " millisecond to release the write lock");
+ isSleep = true;
+ break;
+ }
+ }
+ } finally {
+ namesystem.writeUnlock();
+ }
+ }
+ }
+
+ private boolean checkToDeleteIterator() {
+ return toDeleteIterator != null && toDeleteIterator.hasNext();
+ }
+
+ @Override
+ public void run() {
+ LOG.info("Start MarkedDeleteBlockScrubber thread");
+ while (namesystem.isRunning()) {
+ if (!markedDeleteQueue.isEmpty() || checkToDeleteIterator()) {
+ namesystem.writeLock();
+ try {
+ NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
+ metrics.setDeleteBlocksQueued(markedDeleteQueue.size());
+ isSleep = false;
+ long startTime = Time.now();
+ toRemove(startTime);
+ while (!markedDeleteQueue.isEmpty()) {
+ List<BlockInfo> markedDeleteList = markedDeleteQueue.poll();
+ if (markedDeleteList != null) {
+ toDeleteIterator = markedDeleteList.listIterator();
+ }
+ toRemove(startTime);
+ if (isSleep) {
+ break;
+ }
+ }
+ } finally {
+ namesystem.writeUnlock();
+ }
+ }
+ try {
+ TimeUnit.MILLISECONDS.sleep(deleteBlockUnlockIntervalTimeMs);
+ } catch (InterruptedException e) {
+ LOG.info("Stopping MarkedDeleteBlockScrubber.");
Review comment:
Terminate the current process here is more safe IMO.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java
##########
@@ -671,10 +676,10 @@ public void testTruncateFailure() throws IOException {
"File does not exist", expected);
}
-
+
fs.setPermission(p, FsPermission.createImmutable((short)0664));
{
- final UserGroupInformation fooUgi =
Review comment:
No changes here include L758,L762,L814,L867 etc? Just suggest to keep it
if no explicit changes.
##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
##########
@@ -5982,6 +5982,22 @@
</description>
</property>
+ <property>
Review comment:
I am prefer to hard code and not necessary offer the parameter to end
user to tune.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 612281)
Time Spent: 50m (was: 40m)
> HDFS : Delete performance optimization
> --------------------------------------
>
> Key: HDFS-16043
> URL: https://issues.apache.org/jira/browse/HDFS-16043
> Project: Hadoop HDFS
> Issue Type: Improvement
> Components: hdfs, namanode
> Affects Versions: 3.4.0
> Reporter: Xiangyi Zhu
> Assignee: Xiangyi Zhu
> Priority: Major
> Labels: pull-request-available
> Attachments: 20210527-after.svg, 20210527-before.svg
>
> Time Spent: 50m
> Remaining Estimate: 0h
>
> The deletion of the large directory caused NN to hold the lock for too long,
> which caused our NameNode to be killed by ZKFC.
> Through the flame graph, it is found that its main time-consuming
> calculation is QuotaCount when removingBlocks(toRemovedBlocks) and deleting
> inodes, and removeBlocks(toRemovedBlocks) takes a higher proportion of time.
> h3. solution:
> 1. RemoveBlocks is processed asynchronously. A thread is started in the
> BlockManager to process the deleted blocks and control the lock time.
> 2. QuotaCount calculation optimization, this is similar to the optimization
> of this Issue HDFS-16000.
> h3. Comparison before and after optimization:
> Delete 1000w Inode and 1000w block test.
> *before:*
> remove inode elapsed time: 7691 ms
> remove block elapsed time :11107 ms
> *after:*
> remove inode elapsed time: 4149 ms
> remove block elapsed time :0 ms
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]