This is an automated email from the ASF dual-hosted git repository. mmiller pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new 76aaba0ec4 Improve the madness in the GC (#2716) 76aaba0ec4 is described below commit 76aaba0ec4e086ab535a1e88ba6ecb0f62481b2c Author: Mike Miller <mmil...@apache.org> AuthorDate: Thu May 19 11:02:52 2022 +0000 Improve the madness in the GC (#2716) * Add comments to makeRelative method and simplify * Create new emptyPathsTest in GarbageCollectionTest Co-authored-by: Christopher Tubbs <ctubb...@apache.org> --- .../accumulo/gc/GarbageCollectionAlgorithm.java | 53 ++++++++++++---------- .../apache/accumulo/gc/GarbageCollectionTest.java | 22 +++++++++ 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java index 762c597675..b6e770001a 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java @@ -18,8 +18,10 @@ */ package org.apache.accumulo.gc; +import static java.util.Arrays.stream; +import static java.util.function.Predicate.not; + import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; @@ -52,59 +54,62 @@ public class GarbageCollectionAlgorithm { private static final Logger log = LoggerFactory.getLogger(GarbageCollectionAlgorithm.class); + /** + * This method takes a file or directory path and returns a relative path in 1 of 2 forms: + * + * <pre> + * 1- For files: table-id/tablet-directory/filename.rf + * 2- For directories: table-id/tablet-directory + * </pre> + * + * For example, for full file path like hdfs://foo:6000/accumulo/tables/4/t0/F000.rf it will + * return 4/t0/F000.rf. For a directory that already is relative, like 4/t0, it will just return + * the original path. This method will also remove prefixed relative paths like ../4/t0/F000.rf + * and return 4/t0/F000.rf. It also strips out empty tokens from paths like + * hdfs://foo.com:6000/accumulo/tables/4//t0//F001.rf returning 4/t0/F001.rf. + */ private String makeRelative(String path, int expectedLen) { String relPath = path; + // remove prefixed old relative path if (relPath.startsWith("../")) relPath = relPath.substring(3); + // remove trailing slash while (relPath.endsWith("/")) relPath = relPath.substring(0, relPath.length() - 1); + // remove beginning slash while (relPath.startsWith("/")) relPath = relPath.substring(1); - String[] tokens = relPath.split("/"); - - // handle paths like a//b///c - boolean containsEmpty = false; - for (String token : tokens) { - if (token.equals("")) { - containsEmpty = true; - break; - } - } - - if (containsEmpty) { - ArrayList<String> tmp = new ArrayList<>(); - for (String token : tokens) { - if (!token.equals("")) { - tmp.add(token); - } - } - - tokens = tmp.toArray(new String[tmp.size()]); - } + // Handle paths like a//b///c by dropping the empty tokens. + String[] tokens = stream(relPath.split("/")).filter(not(""::equals)).toArray(String[]::new); if (tokens.length > 3 && path.contains(":")) { + // full file path like hdfs://foo:6000/accumulo/tables/4/t0/F000.rf if (tokens[tokens.length - 4].equals(Constants.TABLE_DIR) && (expectedLen == 0 || expectedLen == 3)) { + // return the last 3 tokens after tables, like 4/t0/F000.rf relPath = tokens[tokens.length - 3] + "/" + tokens[tokens.length - 2] + "/" + tokens[tokens.length - 1]; } else if (tokens[tokens.length - 3].equals(Constants.TABLE_DIR) && (expectedLen == 0 || expectedLen == 2)) { + // return the last 2 tokens after tables, like 4/t0 relPath = tokens[tokens.length - 2] + "/" + tokens[tokens.length - 1]; } else { - throw new IllegalArgumentException(path); + throw new IllegalArgumentException("Failed to make path relative. Bad reference: " + path); } } else if (tokens.length == 3 && (expectedLen == 0 || expectedLen == 3) && !path.contains(":")) { + // we already have a relative path so return it, like 4/t0/F000.rf relPath = tokens[0] + "/" + tokens[1] + "/" + tokens[2]; } else if (tokens.length == 2 && (expectedLen == 0 || expectedLen == 2) && !path.contains(":")) { + // return the last 2 tokens of the relative path, like 4/t0 relPath = tokens[0] + "/" + tokens[1]; } else { - throw new IllegalArgumentException(path); + throw new IllegalArgumentException("Failed to make path relative. Bad reference: " + path); } log.trace("{} -> {} expectedLen = {}", path, relPath, expectedLen); diff --git a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java index 03a0a445ef..9c51fb1719 100644 --- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java +++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java @@ -290,6 +290,28 @@ public class GarbageCollectionTest { "hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf"); } + /** + * Tests valid file paths that have empty tokens. + */ + @Test + public void emptyPathsTest() throws Exception { + TestGCE gce = new TestGCE(); + + gce.candidates.add("hdfs://foo:6000/accumulo/tables/4//t0//F000.rf"); + gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/4//t0//F001.rf"); + gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/5//t0//F005.rf"); + gce.candidates.add("hdfs://foo.com:6000/accumulo//tables//6/t0/F006.rf"); + + gce.addFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4//t0//F000.rf"); + gce.addFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4//t0//F001.rf"); + gce.addFileReference("6", null, "hdfs://foo.com:6000/accumulo//tables//6/t0/F006.rf"); + + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); + gca.collect(gce); + + assertRemoved(gce, "hdfs://foo.com:6000/accumulo/tables/5//t0//F005.rf"); + } + @Test public void testRelative() throws Exception { TestGCE gce = new TestGCE();