Reidddddd commented on a change in pull request #549: HBASE-22912 [Backport] 
HBASE-22867 to branch-1 to avoid ForkJoinPool to spawn thousands of threads
URL: https://github.com/apache/hbase/pull/549#discussion_r319992307
 
 

 ##########
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
 ##########
 @@ -322,140 +330,171 @@ public boolean getEnabled() {
     T act() throws IOException;
   }
 
-  /**
-   * Attemps to clean up a directory, its subdirectories, and files. Return 
value is true if
-   * everything was deleted. false on partial / total failures.
-   */
-  private final class CleanerTask extends RecursiveTask<Boolean> {
-    private static final long serialVersionUID = -1584635903138015418L;
-    private final Path dir;
-    private final boolean root;
+  private interface Callback<T> {
+    void run(T val);
+  }
 
-    CleanerTask(final FileStatus dir, final boolean root) {
-      this(dir.getPath(), root);
+  private final class AsyncResult<T> {
+
+    private Callback<T> callback;
+    private T result;
+    private boolean resultSet = false;
+
+    AsyncResult(Callback<T> callback) {
+      this.callback = callback;
     }
 
-    CleanerTask(final Path dir, final boolean root) {
-      this.dir = dir;
-      this.root = root;
+    AsyncResult() {
     }
 
-    @Override
-    protected Boolean compute() {
-      LOG.trace("Cleaning under " + dir);
-      List<FileStatus> subDirs;
-      List<FileStatus> tmpFiles;
-      final List<FileStatus> files;
-      try {
-        // if dir doesn't exist, we'll get null back for both of these
-        // which will fall through to succeeding.
-        subDirs = FSUtils.listStatusWithStatusFilter(fs, dir, new 
FileStatusFilter() {
-          @Override
-          public boolean accept(FileStatus f) {
-            return f.isDirectory();
-          }
-        });
-        if (subDirs == null) {
-          subDirs = Collections.emptyList();
+    void set(T result) {
+      synchronized (this) {
+        this.result = result;
+        if (callback != null) {
+          callback.run(result);
         }
-        tmpFiles = FSUtils.listStatusWithStatusFilter(fs, dir, new 
FileStatusFilter() {
-          @Override
-          public boolean accept(FileStatus f) {
-            return f.isFile();
-          }
-        });
-        files = tmpFiles == null ? Collections.<FileStatus>emptyList() : 
tmpFiles;
-      } catch (IOException ioe) {
-        LOG.warn("failed to get FileStatus for contents of '" + dir + "'", 
ioe);
-        return false;
+        // Mark the result set process finished and notify the waiting get 
method.
+        this.resultSet = true;
+        this.notifyAll();
       }
+    }
 
-      boolean allFilesDeleted = true;
-      if (!files.isEmpty()) {
-        allFilesDeleted = deleteAction(new Action<Boolean>() {
-          @Override
-          public Boolean act() throws IOException {
-            return checkAndDeleteFiles(files);
-          }
-        }, "files");
+    synchronized T get() throws Exception {
+      while (!resultSet) {
+        wait();
       }
+      return result;
+    }
+  }
 
-      boolean allSubdirsDeleted = true;
-      if (!subDirs.isEmpty()) {
-        final List<CleanerTask> tasks = 
Lists.newArrayListWithCapacity(subDirs.size());
-        for (FileStatus subdir : subDirs) {
-          CleanerTask task = new CleanerTask(subdir, false);
-          tasks.add(task);
-          task.fork();
+  /**
+   * Attempts to clean up a directory(its subdirectories, and files) in a
+   * {@link java.util.concurrent.ThreadPoolExecutor} concurrently. We can get 
the final result by
+   * calling result.get().
+   * @param dir means the directory we will start to traverse and delete.
+   * @param root means whether it's the root directory to traverse, if true 
then cannot delete it.
+   * @param result {@link AsyncResult<Boolean>} to fetch the result. True 
means the current
+   *          directory has been deleted successfully (for root dir we don't 
need that) and the
+   *          parent will try to delete its own directory if all of the 
children(files and
+   *          sub-directories are included) has been deleted successfully.
+   */
+  private void traverseAndDelete(final Path dir, final boolean root,
+      final AsyncResult<Boolean> result) {
+    try {
+      final Action<Boolean> curDirDeletion = new Action<Boolean>() {
+        @Override
+        public Boolean act() throws IOException {
+          return fs.delete(dir, false);
         }
-        allSubdirsDeleted = deleteAction(new Action<Boolean>() {
-          @Override
-          public Boolean act() throws IOException {
-            return getCleanResult(tasks);
-          }
-        }, "subdirs");
+      };
+
+      // Step.1: List all files under the given directory.
+      List<FileStatus> allPaths = Arrays.asList(fs.listStatus(dir));
+      final List<FileStatus> subDirs = new ArrayList<>();
+      final List<FileStatus> files = new ArrayList<>();
+      for (FileStatus status : allPaths) {
+        if (status.isDirectory()) {
+          subDirs.add(status);
+        } else if (status.isFile()) {
+          files.add(status);
+        }
+      }
+
+      // Step.2: Try to delete all the deletable files.
+      final boolean allFilesDeleted = files.isEmpty() || deleteAction(new 
Action<Boolean>() {
+        @Override
+        public Boolean act() throws IOException {
+          return checkAndDeleteFiles(files);
+        }
+      }, "files", dir);
+
+      // Step.3: Start to traverse and delete the sub-directories.
+      if (subDirs.isEmpty()) {
+        // If no sub-directories, then just try to delete the current dir and 
finish the result.
+        boolean deleted = allFilesDeleted;
+        if (allFilesDeleted && !root) {
+          deleted = deleteAction(curDirDeletion, "dir", dir);
+        }
+        result.set(deleted);
+        return;
       }
 
-      boolean result = allFilesDeleted && allSubdirsDeleted;
-      // if and only if files and subdirs under current dir are deleted 
successfully, and
-      // it is not the root dir, then task will try to delete it.
-      if (result && !root) {
-        result &= deleteAction(new Action<Boolean>() {
+      // Otherwise, there should be some sub-directories. then we will 
register the following
+      // callback in AsyncResult of sub-directory, and once all of the 
sub-directories are traversed
+      // and deleted then the callback will try to delete the current dir and 
finish the result.
+      final AtomicInteger remain = new AtomicInteger(subDirs.size());
+      Callback<Boolean> callback = new Callback<Boolean>() {
 
 Review comment:
   Where does this `callback.get()` get called? Or is it no need?  Asking for 
sure.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to