This is an automated email from the ASF dual-hosted git repository.

elserj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase-filesystem.git

commit 21e1e920b7f15dbb1bfb8b261d39d303ea97f3e2
Author: Josh Elser <els...@apache.org>
AuthorDate: Wed Nov 10 13:13:32 2021 -0500

    HBASE-26437 Clean up the znodes for the src after a rename.
    
    HBOSS was orphaning znodes from the src of a path which is renamed. Over
    time, this will result in a very large usage of ZK due to HBOSS.
    
    Add some logging to dump the contents of ZK
    
    Signed-off-by: Wellington Chevreuil <wchevre...@apache.org>
    
    Closes #29
---
 .../hbase/oss/HBaseObjectStoreSemantics.java       |  20 ++-
 .../hadoop/hbase/oss/sync/TreeLockManager.java     |  26 ++-
 .../hadoop/hbase/oss/sync/ZKTreeLockManager.java   |  18 +-
 .../hbase/oss/HBaseObjectStoreSemanticsTest.java   |   4 +
 .../apache/hadoop/hbase/oss/TestZNodeCleanup.java  | 188 +++++++++++++++++++++
 .../hadoop/hbase/oss/sync/TestTreeLockManager.java |   2 +-
 hbase-oss/src/test/resources/log4j.properties      |   2 +
 7 files changed, 246 insertions(+), 14 deletions(-)

diff --git 
a/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/HBaseObjectStoreSemantics.java
 
b/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/HBaseObjectStoreSemantics.java
index 7669181..65eeb16 100644
--- 
a/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/HBaseObjectStoreSemantics.java
+++ 
b/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/HBaseObjectStoreSemantics.java
@@ -24,6 +24,8 @@ import java.net.URI;
 import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.ContentSummary;
@@ -452,13 +454,21 @@ public class HBaseObjectStoreSemantics extends 
FilterFileSystem {
     long startTime = System.currentTimeMillis();
     long lockAcquiredTime = startTime;
     long doneTime = startTime;
-    try (AutoLock l = sync.lockRename(src, dst)) {
+    // Future to pass into the AutoLock so it knows if it should clean up.
+    final CompletableFuture<Boolean> renameResult = new CompletableFuture<>();
+    try (AutoLock l = sync.lockRename(src, dst, renameResult)) {
       lockAcquiredTime = System.currentTimeMillis();
       metrics.updateAcquireRenameLockHisto(lockAcquiredTime- startTime);
-      boolean result = fs.rename(src, dst);
-      doneTime = System.currentTimeMillis();
-      metrics.updateRenameFsOperationHisto(doneTime - lockAcquiredTime);
-      return result;
+      // Defaulting to false in the case that fs.rename throws an exception
+      boolean result = false;
+      try {
+        result = fs.rename(src, dst);
+        return result;
+      } finally {
+        renameResult.complete(result);
+        doneTime = System.currentTimeMillis();
+        metrics.updateRenameFsOperationHisto(doneTime - lockAcquiredTime);
+      }
     }
     finally {
       long releasedLocksTime = System.currentTimeMillis();
diff --git 
a/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/TreeLockManager.java 
b/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/TreeLockManager.java
index 22209e4..9506262 100644
--- 
a/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/TreeLockManager.java
+++ 
b/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/TreeLockManager.java
@@ -21,7 +21,10 @@ package org.apache.hadoop.hbase.oss.sync;
 import java.io.IOException;
 import java.net.URI;
 import java.util.Arrays;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
+
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryPolicy;
 import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
@@ -471,7 +474,7 @@ public abstract class TreeLockManager {
    * @return AutoCloseable to release both paths
    * @throws IOException at any possible IO failure.
    */
-  public AutoLock lockRename(Path rawSrc, Path rawDst) throws IOException {
+  public AutoLock lockRename(Path rawSrc, Path rawDst, Future<Boolean> 
successFuture) throws IOException {
     Path src = norm(rawSrc);
     Path dst = norm(rawDst);
     LOG.debug("About to lock for rename: from {} to {}", src, dst);
@@ -484,8 +487,29 @@ public abstract class TreeLockManager {
     }
     return new AutoLock() {
       public void close() throws IOException {
+        // We have to clean up the src znodes:
+        //   1. If the rename was successful
+        //   2. While we still hold the write lock
         LOG.debug("About to unlock after rename: from {} to {}", src, dst);
         try {
+          Boolean renameSuccess;
+          try {
+            renameSuccess = successFuture.get();
+          } catch (InterruptedException | ExecutionException e) {
+            LOG.warn("Unable to determine if filesystem rename was successful. 
Assuming it failed.", e);
+            renameSuccess = false;
+          }
+          if (renameSuccess != null && renameSuccess.booleanValue()) {
+            // Tricky... HBossContract tests tough things like
+            //   `rename("/", "/somethingelse")`
+            // This means we grabbed write locks on
+            //    /               (src)
+            //    /somethingelse  (dst)
+            // Thus, we can't safely delete the znodes for src as it may
+            // then also affect the (held) lock on the dst. This is why
+            // we only delete the znodes on success.
+            recursiveDelete(src);
+          }
           writeUnlock(src);
         } finally {
           writeUnlock(dst);
diff --git 
a/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/ZKTreeLockManager.java
 
b/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/ZKTreeLockManager.java
index 549d4c1..16bd57e 100644
--- 
a/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/ZKTreeLockManager.java
+++ 
b/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/ZKTreeLockManager.java
@@ -68,7 +68,7 @@ public class ZKTreeLockManager extends TreeLockManager {
     root = "/hboss";
   }
 
-  private static final String lockSubZnode = ".hboss-lock-znode";
+  public static final String LOCK_SUB_ZNODE = ".hboss-lock-znode";
 
   private Map<Path,InterProcessReadWriteLock> lockCache = new HashMap<>();
 
@@ -161,7 +161,7 @@ public class ZKTreeLockManager extends TreeLockManager {
       get(p).writeLock().release();
     } catch(IllegalMonitorStateException e) {
       // Reentrant locks might be acquired multiple times
-      LOG.error("Tried to release unacquired write lock: {}", p);
+      LOG.error("Tried to release unacquired write lock: {}", p, e);
       throw e;
     } catch (Exception e) {
       throw new IOException("Exception during write unlocking of path " + p, 
e);
@@ -226,10 +226,11 @@ public class ZKTreeLockManager extends TreeLockManager {
 
   @Override
   protected void recursiveDelete(Path p) throws IOException {
+    LOG.debug("Removing all mutex and znodes for paths beneath {}", p);
     try {
       ZKPaths.deleteChildren(curator.getZookeeperClient().getZooKeeper(),
             p.toString(), !p.isRoot());
-      // Before this method is called, we have a guarantee that 
+      // Before this method is called, we have a guarantee that
       //   1. There are no write locks above or below us
       //   2. There are no read locks below us
       // As such, we can just remove locks beneath us as we find them.
@@ -257,7 +258,7 @@ public class ZKTreeLockManager extends TreeLockManager {
    *
    * Specifically, this method will return true if the given path is a 
sub-directory
    * of the parent or a file in the directory represented by the parent. This 
method
-   * returns false if the parent and the given path are the same. 
+   * returns false if the parent and the given path are the same.
    */
   boolean isBeneath(Path parent, Path given) {
     if (parent.equals(given)) {
@@ -287,7 +288,7 @@ public class ZKTreeLockManager extends TreeLockManager {
       if (level < maxLevel) {
         List<String> children = curator.getChildren().forPath(p.toString());
         for (String child : children) {
-          if (child.equals(lockSubZnode)) {
+          if (child.equals(LOCK_SUB_ZNODE)) {
             continue;
           }
           if (writeLockBelow(new Path(p, child), level+1, maxLevel)) {
@@ -312,7 +313,7 @@ public class ZKTreeLockManager extends TreeLockManager {
       if (level < maxLevel) {
         List<String> children = curator.getChildren().forPath(p.toString());
         for (String child : children) {
-          if (child.equals(lockSubZnode)) {
+          if (child.equals(LOCK_SUB_ZNODE)) {
             continue;
           }
           if (readLockBelow(new Path(p, child), level+1, maxLevel)) {
@@ -357,6 +358,9 @@ public class ZKTreeLockManager extends TreeLockManager {
     StringBuilder sb = new StringBuilder();
     Map<Path,InterProcessReadWriteLock> cache = getUnmodifiableCache();
     for (Entry<Path,InterProcessReadWriteLock> entry : cache.entrySet()) {
+      if (sb.length() > 0) {
+        sb.append("\n");
+      }
       
sb.append(entry.getKey()).append("=").append(describeLock(entry.getValue()));
     }
     return sb.toString();
@@ -382,7 +386,7 @@ public class ZKTreeLockManager extends TreeLockManager {
 
   private synchronized InterProcessReadWriteLock get(Path path) throws 
IOException {
     if (!lockCache.containsKey(path)) {
-      String zkPath = new Path(path, lockSubZnode).toString();
+      String zkPath = new Path(path, LOCK_SUB_ZNODE).toString();
       try {
         ZKPaths.mkdirs(curator.getZookeeperClient().getZooKeeper(), zkPath, 
true);
       } catch (KeeperException.NodeExistsException e) {
diff --git 
a/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/HBaseObjectStoreSemanticsTest.java
 
b/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/HBaseObjectStoreSemanticsTest.java
index e11647b..a2c83f0 100644
--- 
a/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/HBaseObjectStoreSemanticsTest.java
+++ 
b/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/HBaseObjectStoreSemanticsTest.java
@@ -43,6 +43,10 @@ public class HBaseObjectStoreSemanticsTest {
     return TestUtils.testPath(hboss, path);
   }
 
+  public TreeLockManager getLockManager() {
+    return sync;
+  }
+
   @Before
   public void setup() throws Exception {
     Configuration conf = new Configuration();
diff --git 
a/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/TestZNodeCleanup.java 
b/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/TestZNodeCleanup.java
new file mode 100644
index 0000000..bf1dec6
--- /dev/null
+++ b/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/TestZNodeCleanup.java
@@ -0,0 +1,188 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.oss;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assume.assumeTrue;
+
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.oss.sync.ZKTreeLockManager;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.Watcher.Event.KeeperState;
+import org.apache.zookeeper.ZooKeeper;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Validates that when some entry in the object store is deleted, the 
corresponding
+ * data in ZooKeeper is also deleted.
+ */
+public class TestZNodeCleanup extends HBaseObjectStoreSemanticsTest {
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestZNodeCleanup.class);
+
+  private ZKTreeLockManager lockManager;
+  private ZooKeeper zk;
+
+  @Before
+  public void setup() throws Exception {
+    super.setup();
+    assumeTrue("Lock manager is a " + getLockManager().getClass(),
+        getLockManager() instanceof ZKTreeLockManager);
+    lockManager = (ZKTreeLockManager) getLockManager();
+    Configuration conf = hboss.getConf();
+    LOG.info("Waiting for ZK client to connect");
+    // TODO should wait for ZK to connect
+    final CountDownLatch latch = new CountDownLatch(1);
+    // Root the ZK connection beneath /hboss
+    zk = new ZooKeeper(conf.get(Constants.ZK_CONN_STRING) + "/hboss", 60000, 
new Watcher() {
+      @Override
+      public void process(WatchedEvent event) {
+        LOG.info("Caught event " + event);
+        if (event.getState() == KeeperState.SyncConnected) {
+          latch.countDown();
+        }
+      }
+    });
+    latch.await();
+    LOG.info("ZooKeeper client is connected");
+  }
+
+  @After
+  public void teardown() throws Exception {
+    String zkRoot = lockManager.norm(TestUtils.testPathRoot(hboss)).toString();
+    LOG.info("Dumping contents of ZooKeeper after test from {}", zkRoot);
+    printZkBFS(zkRoot);
+    if (zk != null) {
+      zk.close();
+      zk = null;
+    }
+  }
+
+  void printZkBFS(String path) throws Exception {
+    LOG.info(path);
+    List<String> children = zk.getChildren(path, false);
+    for (String child : children) {
+      printZkBFS(path + "/" + child);
+    }
+  }
+
+  String getZNodeFromPath(Path p) {
+    return Path.getPathWithoutSchemeAndAuthority(p).toString().substring(1);
+  }
+
+  void validatePathInZk(Path zkPath) throws Exception {
+    assertNotNull(zkPath + " did not exist in ZK", 
zk.exists(zkPath.toString(), false));
+    String hbossLock = new Path(zkPath, 
ZKTreeLockManager.LOCK_SUB_ZNODE).toString();
+    assertNotNull(hbossLock + " did not exist in ZK", zk.exists(hbossLock, 
false));
+  }
+
+  void validatePathNotInZk(Path zkPath) throws Exception {
+    assertNull(zkPath + " incorrectly exists in ZK.", 
zk.exists(zkPath.toString(), false));
+  }
+
+  @Test
+  public void testRename() throws Exception {
+    // Rename src to dest and validate that the znode for src is cleaned up.
+    final Path src = TestUtils.testPath(hboss, "src");
+    final Path dest = TestUtils.testPath(hboss, "dest");
+    assertTrue(hboss.mkdirs(src));
+    // The src znode should exist after creating the dir in S3
+    validatePathInZk(lockManager.norm(src));
+    // `mv src dest`
+    assertTrue(hboss.rename(src, dest));
+    // We should have a znode for dest (we just locked it)
+    validatePathInZk(lockManager.norm(dest));
+    // We should no longer have a znode for src (we effectively deleted it 
from S3)
+    validatePathNotInZk(lockManager.norm(src));
+  }
+
+  @Test
+  public void testFailedRename() throws Exception {
+    // Rename src to dest and validate that the znode for src is cleaned up.
+    final Path src = TestUtils.testPathRoot(hboss);
+    final Path dest = TestUtils.testPath(hboss, "dest");
+    assertTrue(hboss.mkdirs(src));
+    // The src znode should exist after creating the dir in S3
+    validatePathInZk(lockManager.norm(src));
+    // The move should fail
+    assertFalse(hboss.rename(src, dest));
+    // We should have a znode for dest (we just locked it)
+    validatePathInZk(lockManager.norm(src));
+    // We should no longer have a znode for src (we effectively deleted it 
from S3)
+    validatePathInZk(lockManager.norm(dest));
+  }
+
+  @Test
+  public void testRenameDeeperToHigher() throws Exception {
+    // Rename src to dest and validate that the znode for src is cleaned up.
+    final Path src = TestUtils.testPath(hboss, "/a/b/1");
+    final Path dest = TestUtils.testPath(hboss, "/a/1");
+    assertTrue(hboss.mkdirs(src));
+    // The src znode should exist after creating the dir in S3
+    validatePathInZk(lockManager.norm(src));
+    // mv /a/b/1 /a/1
+    assertTrue(hboss.rename(src, dest));
+    // We should not have a znode for the src
+    validatePathNotInZk(lockManager.norm(src));
+    // We should have a lock for the dest
+    validatePathInZk(lockManager.norm(dest));
+  }
+
+  @Test
+  public void testRenameHigherToDeeper() throws Exception {
+    // Rename src to dest and validate that the znode for src is cleaned up.
+    final Path src = TestUtils.testPath(hboss, "/a/1");
+    final Path dest = TestUtils.testPath(hboss, "/a/b/1");
+    // `mkdir /a/1`
+    assertTrue(hboss.mkdirs(src));
+    // The src znode should exist after creating the dir in S3
+    validatePathInZk(lockManager.norm(src));
+    // `mkdir /a/b`
+    assertTrue(hboss.mkdirs(dest.getParent()));
+    // `mv /a/1 /a/b/1`
+    assertTrue(hboss.rename(src, dest));
+    // We should have a znode for dest (we just locked it)
+    validatePathNotInZk(lockManager.norm(src));
+    // We should no longer have a znode for src (we effectively deleted it 
from S3)
+    validatePathInZk(lockManager.norm(dest));
+  }
+
+  @Test
+  public void testDelete() throws Exception {
+    // Delete src and validate that the znode for src is cleaned up.
+    final Path src = TestUtils.testPath(hboss, "src");
+    assertTrue(hboss.mkdirs(src));
+    // The src znode should exist after creating the dir in S3
+    validatePathInZk(lockManager.norm(src));
+    // `mv src dest`
+    assertTrue(hboss.delete(src, true));
+    // We should no longer have a znode for src since we deleted it from S3
+    validatePathNotInZk(lockManager.norm(src));
+  }
+}
diff --git 
a/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/sync/TestTreeLockManager.java
 
b/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/sync/TestTreeLockManager.java
index e9ef0ce..7236da8 100644
--- 
a/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/sync/TestTreeLockManager.java
+++ 
b/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/sync/TestTreeLockManager.java
@@ -36,7 +36,7 @@ public class TestTreeLockManager extends 
HBaseObjectStoreSemanticsTest {
 
   @Test
   public void testLockBelowChecks() throws Exception {
-    Assume.assumeFalse(sync instanceof NullTreeLockManager);
+    Assume.assumeFalse(getLockManager() instanceof NullTreeLockManager);
 
     Path parent = testPath("testListingLevels");
     Path child = new Path(parent, "child");
diff --git a/hbase-oss/src/test/resources/log4j.properties 
b/hbase-oss/src/test/resources/log4j.properties
index a59c82e..108b3f9 100644
--- a/hbase-oss/src/test/resources/log4j.properties
+++ b/hbase-oss/src/test/resources/log4j.properties
@@ -17,3 +17,5 @@ log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} 
(%F:%M(%L)) - %m%n
 log4j.logger.org.apache.hadoop=DEBUG
+log4j.logger.org.apache.hadoop.metrics2=WARN
+log4j.logger.org.apache.hadoop.fs=WARN
\ No newline at end of file

Reply via email to