This is an automated email from the ASF dual-hosted git repository.

mmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 76aaba0ec4 Improve the madness in the GC (#2716)
76aaba0ec4 is described below

commit 76aaba0ec4e086ab535a1e88ba6ecb0f62481b2c
Author: Mike Miller <mmil...@apache.org>
AuthorDate: Thu May 19 11:02:52 2022 +0000

    Improve the madness in the GC (#2716)
    
    * Add comments to makeRelative method and simplify
    * Create new emptyPathsTest in GarbageCollectionTest
    
    Co-authored-by: Christopher Tubbs <ctubb...@apache.org>
---
 .../accumulo/gc/GarbageCollectionAlgorithm.java    | 53 ++++++++++++----------
 .../apache/accumulo/gc/GarbageCollectionTest.java  | 22 +++++++++
 2 files changed, 51 insertions(+), 24 deletions(-)

diff --git 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
index 762c597675..b6e770001a 100644
--- 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
+++ 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
@@ -18,8 +18,10 @@
  */
 package org.apache.accumulo.gc;
 
+import static java.util.Arrays.stream;
+import static java.util.function.Predicate.not;
+
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -52,59 +54,62 @@ public class GarbageCollectionAlgorithm {
 
   private static final Logger log = 
LoggerFactory.getLogger(GarbageCollectionAlgorithm.class);
 
+  /**
+   * This method takes a file or directory path and returns a relative path in 
1 of 2 forms:
+   *
+   * <pre>
+   *      1- For files: table-id/tablet-directory/filename.rf
+   *      2- For directories: table-id/tablet-directory
+   * </pre>
+   *
+   * For example, for full file path like 
hdfs://foo:6000/accumulo/tables/4/t0/F000.rf it will
+   * return 4/t0/F000.rf. For a directory that already is relative, like 4/t0, 
it will just return
+   * the original path. This method will also remove prefixed relative paths 
like ../4/t0/F000.rf
+   * and return 4/t0/F000.rf. It also strips out empty tokens from paths like
+   * hdfs://foo.com:6000/accumulo/tables/4//t0//F001.rf returning 4/t0/F001.rf.
+   */
   private String makeRelative(String path, int expectedLen) {
     String relPath = path;
 
+    // remove prefixed old relative path
     if (relPath.startsWith("../"))
       relPath = relPath.substring(3);
 
+    // remove trailing slash
     while (relPath.endsWith("/"))
       relPath = relPath.substring(0, relPath.length() - 1);
 
+    // remove beginning slash
     while (relPath.startsWith("/"))
       relPath = relPath.substring(1);
 
-    String[] tokens = relPath.split("/");
-
-    // handle paths like a//b///c
-    boolean containsEmpty = false;
-    for (String token : tokens) {
-      if (token.equals("")) {
-        containsEmpty = true;
-        break;
-      }
-    }
-
-    if (containsEmpty) {
-      ArrayList<String> tmp = new ArrayList<>();
-      for (String token : tokens) {
-        if (!token.equals("")) {
-          tmp.add(token);
-        }
-      }
-
-      tokens = tmp.toArray(new String[tmp.size()]);
-    }
+    // Handle paths like a//b///c by dropping the empty tokens.
+    String[] tokens = 
stream(relPath.split("/")).filter(not(""::equals)).toArray(String[]::new);
 
     if (tokens.length > 3 && path.contains(":")) {
+      // full file path like hdfs://foo:6000/accumulo/tables/4/t0/F000.rf
       if (tokens[tokens.length - 4].equals(Constants.TABLE_DIR)
           && (expectedLen == 0 || expectedLen == 3)) {
+        // return the last 3 tokens after tables, like 4/t0/F000.rf
         relPath = tokens[tokens.length - 3] + "/" + tokens[tokens.length - 2] 
+ "/"
             + tokens[tokens.length - 1];
       } else if (tokens[tokens.length - 3].equals(Constants.TABLE_DIR)
           && (expectedLen == 0 || expectedLen == 2)) {
+        // return the last 2 tokens after tables, like 4/t0
         relPath = tokens[tokens.length - 2] + "/" + tokens[tokens.length - 1];
       } else {
-        throw new IllegalArgumentException(path);
+        throw new IllegalArgumentException("Failed to make path relative. Bad 
reference: " + path);
       }
     } else if (tokens.length == 3 && (expectedLen == 0 || expectedLen == 3)
         && !path.contains(":")) {
+      // we already have a relative path so return it, like 4/t0/F000.rf
       relPath = tokens[0] + "/" + tokens[1] + "/" + tokens[2];
     } else if (tokens.length == 2 && (expectedLen == 0 || expectedLen == 2)
         && !path.contains(":")) {
+      // return the last 2 tokens of the relative path, like 4/t0
       relPath = tokens[0] + "/" + tokens[1];
     } else {
-      throw new IllegalArgumentException(path);
+      throw new IllegalArgumentException("Failed to make path relative. Bad 
reference: " + path);
     }
 
     log.trace("{} -> {} expectedLen = {}", path, relPath, expectedLen);
diff --git 
a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java 
b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
index 03a0a445ef..9c51fb1719 100644
--- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
+++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
@@ -290,6 +290,28 @@ public class GarbageCollectionTest {
         "hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf");
   }
 
+  /**
+   * Tests valid file paths that have empty tokens.
+   */
+  @Test
+  public void emptyPathsTest() throws Exception {
+    TestGCE gce = new TestGCE();
+
+    gce.candidates.add("hdfs://foo:6000/accumulo/tables/4//t0//F000.rf");
+    gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/4//t0//F001.rf");
+    gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/5//t0//F005.rf");
+    gce.candidates.add("hdfs://foo.com:6000/accumulo//tables//6/t0/F006.rf");
+
+    gce.addFileReference("4", null, 
"hdfs://foo.com:6000/accumulo/tables/4//t0//F000.rf");
+    gce.addFileReference("4", null, 
"hdfs://foo.com:6000/accumulo/tables/4//t0//F001.rf");
+    gce.addFileReference("6", null, 
"hdfs://foo.com:6000/accumulo//tables//6/t0/F006.rf");
+
+    GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
+    gca.collect(gce);
+
+    assertRemoved(gce, "hdfs://foo.com:6000/accumulo/tables/5//t0//F005.rf");
+  }
+
   @Test
   public void testRelative() throws Exception {
     TestGCE gce = new TestGCE();

Reply via email to