This is an automated email from the ASF dual-hosted git repository. mmiller pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new afe78566f8 Create Reference interface for new GC classes (#2767) afe78566f8 is described below commit afe78566f85f9e9fd5b43165305bc70cd3a9c878 Author: Mike Miller <mmil...@apache.org> AuthorDate: Thu Jun 23 21:59:43 2022 +0000 Create Reference interface for new GC classes (#2767) * Create ReferenceFile class to implement Reference interface * Make ReferenceDirectory extend ReferenceFile * Create AllVolumesDiretory class to extend ReferenceFile and move GcVolumeUtil method to class * Comment and clean up GC code * Update MetadataSchema.isValidDirCol regex to be more strict * Make TableGroupWatcher use ReferenceFile for calls to GC * Updates to various relevant tests --- .../core/client/rfile/RFileWriterBuilder.java | 7 +-- .../org/apache/accumulo/core/gc/Reference.java | 61 +++++++--------------- .../accumulo/core/gc/ReferenceDirectory.java | 29 ++++++++-- .../core/gc/{Reference.java => ReferenceFile.java} | 41 ++++++++++----- .../apache/accumulo/core/metadata/TabletFile.java | 8 ++- .../accumulo/core/metadata/ValidationUtil.java | 24 +++++++-- .../accumulo/core/metadata/schema/Ample.java | 14 ++--- .../core/metadata/schema/MetadataSchema.java | 4 +- .../accumulo/server/gc/AllVolumesDirectory.java | 53 +++++++++++++++++++ .../apache/accumulo/server/gc/GcVolumeUtil.java | 13 +---- .../accumulo/server/metadata/ServerAmpleImpl.java | 12 ++--- .../accumulo/server/util/MetadataTableUtil.java | 9 ++-- .../main/java/org/apache/accumulo/gc/GCRun.java | 22 +++++--- .../accumulo/gc/GarbageCollectionAlgorithm.java | 16 +++--- .../accumulo/gc/GarbageCollectionEnvironment.java | 7 +-- .../apache/accumulo/gc/GarbageCollectionTest.java | 4 +- .../accumulo/gc/SimpleGarbageCollectorTest.java | 18 +++---- .../accumulo/manager/TabletGroupWatcher.java | 19 ++++--- .../tableOps/bulkVer1/CleanUpBulkImport.java | 4 +- .../tableOps/bulkVer2/CleanUpBulkImport.java | 4 +- .../accumulo/manager/upgrade/Upgrader9to10.java | 13 ++--- .../manager/upgrade/Upgrader9to10Test.java | 32 ++++++------ .../java/org/apache/accumulo/test/CloneIT.java | 52 +++++++++--------- .../test/functional/GarbageCollectorIT.java | 28 +++++++--- .../test/upgrade/GCUpgrade9to10TestIT.java | 5 +- 25 files changed, 298 insertions(+), 201 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileWriterBuilder.java b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileWriterBuilder.java index f26348d3b5..b2bd41165e 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileWriterBuilder.java +++ b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileWriterBuilder.java @@ -39,6 +39,7 @@ import org.apache.accumulo.core.conf.DefaultConfiguration; import org.apache.accumulo.core.crypto.CryptoServiceFactory; import org.apache.accumulo.core.crypto.CryptoServiceFactory.ClassloaderType; import org.apache.accumulo.core.file.FileOperations; +import org.apache.accumulo.core.metadata.ValidationUtil; import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl; import org.apache.accumulo.core.spi.crypto.CryptoService; import org.apache.hadoop.fs.FSDataOutputStream; @@ -131,11 +132,7 @@ class RFileWriterBuilder implements RFile.OutputArguments, RFile.WriterFSOptions @Override public WriterFSOptions to(String filename) { - Objects.requireNonNull(filename); - if (!filename.endsWith(".rf")) { - throw new IllegalArgumentException( - "Provided filename (" + filename + ") does not end with '.rf'"); - } + ValidationUtil.validateRFileName(filename); this.out = new OutputArgs(filename); return this; } diff --git a/core/src/main/java/org/apache/accumulo/core/gc/Reference.java b/core/src/main/java/org/apache/accumulo/core/gc/Reference.java index f40424c62f..cdffbd7eeb 100644 --- a/core/src/main/java/org/apache/accumulo/core/gc/Reference.java +++ b/core/src/main/java/org/apache/accumulo/core/gc/Reference.java @@ -19,50 +19,29 @@ package org.apache.accumulo.core.gc; import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.metadata.schema.MetadataSchema; /** - * A GC reference to a tablet file or directory. + * A GC reference used for collecting files and directories into a single stream. The GC deals with + * two inputs conceptually: candidates and references. Candidates are files that could be possibly + * be deleted if they are not defeated by a reference. */ -public class Reference implements Comparable<Reference> { - // parts of an absolute URI, like "hdfs://1.2.3.4/accumulo/tables/2a/t-0003" - public final TableId tableId; // 2a +public interface Reference { + /** + * Only return true if the reference is a directory. + */ + boolean isDirectory(); - // the exact string that is stored in the metadata - public final String metadataEntry; + /** + * Get the {@link TableId} of the reference. + */ + TableId getTableId(); - public Reference(TableId tableId, String metadataEntry) { - MetadataSchema.TabletsSection.ServerColumnFamily.validateDirCol(tableId.canonical()); - this.tableId = tableId; - this.metadataEntry = metadataEntry; - } - - @Override - public int compareTo(Reference that) { - if (equals(that)) { - return 0; - } else { - return this.metadataEntry.compareTo(that.metadataEntry); - } - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - Reference other = (Reference) obj; - if (metadataEntry == null) { - return other.metadataEntry == null; - } else - return metadataEntry.equals(other.metadataEntry); - } - - @Override - public int hashCode() { - return this.metadataEntry.hashCode(); - } + /** + * Get the exact string stored in the metadata table for this file or directory. A file will be + * read from the Tablet "file" column family: + * {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily} + * A directory will be read from the "srv:dir" column family: + * {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily} + */ + String getMetadataEntry(); } diff --git a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java index f2d6e97e94..b9a6589d9f 100644 --- a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java +++ b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java @@ -19,15 +19,38 @@ package org.apache.accumulo.core.gc; import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.metadata.schema.MetadataSchema; /** - * Part of the Tablet File path that is definitely a directory. + * A GC reference to a Tablet directory, like t-0003. */ -public class ReferenceDirectory extends Reference { - public final String tabletDir; // t-0003 +public class ReferenceDirectory extends ReferenceFile { + private final String tabletDir; // t-0003 public ReferenceDirectory(TableId tableId, String dirName) { super(tableId, dirName); + MetadataSchema.TabletsSection.ServerColumnFamily.validateDirCol(dirName); this.tabletDir = dirName; } + + @Override + public boolean isDirectory() { + return true; + } + + public String getTabletDir() { + return tabletDir; + } + + /** + * A Tablet directory should have a metadata entry equal to the dirName. + */ + @Override + public String getMetadataEntry() { + if (!tabletDir.equals(metadataEntry)) { + throw new IllegalStateException( + "Tablet dir " + tabletDir + " is not equal to metadataEntry: " + metadataEntry); + } + return metadataEntry; + } } diff --git a/core/src/main/java/org/apache/accumulo/core/gc/Reference.java b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java similarity index 65% copy from core/src/main/java/org/apache/accumulo/core/gc/Reference.java copy to core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java index f40424c62f..78c546a6b9 100644 --- a/core/src/main/java/org/apache/accumulo/core/gc/Reference.java +++ b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java @@ -18,27 +18,43 @@ */ package org.apache.accumulo.core.gc; +import java.util.Objects; + import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.metadata.schema.MetadataSchema; /** - * A GC reference to a tablet file or directory. + * A GC reference used for streaming and delete markers. This type is a file. Subclass is a + * directory. */ -public class Reference implements Comparable<Reference> { +public class ReferenceFile implements Reference, Comparable<ReferenceFile> { // parts of an absolute URI, like "hdfs://1.2.3.4/accumulo/tables/2a/t-0003" public final TableId tableId; // 2a // the exact string that is stored in the metadata - public final String metadataEntry; + protected final String metadataEntry; + + public ReferenceFile(TableId tableId, String metadataEntry) { + this.tableId = Objects.requireNonNull(tableId); + this.metadataEntry = Objects.requireNonNull(metadataEntry); + } + + @Override + public boolean isDirectory() { + return false; + } - public Reference(TableId tableId, String metadataEntry) { - MetadataSchema.TabletsSection.ServerColumnFamily.validateDirCol(tableId.canonical()); - this.tableId = tableId; - this.metadataEntry = metadataEntry; + @Override + public TableId getTableId() { + return tableId; + } + + @Override + public String getMetadataEntry() { + return metadataEntry; } @Override - public int compareTo(Reference that) { + public int compareTo(ReferenceFile that) { if (equals(that)) { return 0; } else { @@ -54,11 +70,8 @@ public class Reference implements Comparable<Reference> { return false; if (getClass() != obj.getClass()) return false; - Reference other = (Reference) obj; - if (metadataEntry == null) { - return other.metadataEntry == null; - } else - return metadataEntry.equals(other.metadataEntry); + ReferenceFile other = (ReferenceFile) obj; + return metadataEntry.equals(other.metadataEntry); } @Override diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/TabletFile.java b/core/src/main/java/org/apache/accumulo/core/metadata/TabletFile.java index 0127c3eb6a..c7e4a22926 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/TabletFile.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/TabletFile.java @@ -23,9 +23,10 @@ import static org.apache.accumulo.core.Constants.HDFS_TABLES_DIR; import java.util.Objects; import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; @@ -46,6 +47,8 @@ public class TabletFile implements Comparable<TabletFile> { protected final Path metaPath; private final String normalizedPath; + private static final Logger log = LoggerFactory.getLogger(TabletFile.class); + /** * Construct new tablet file using a Path. Used in the case where we had to use Path object to * qualify an absolute path or create a new file. @@ -53,10 +56,11 @@ public class TabletFile implements Comparable<TabletFile> { public TabletFile(Path metaPath) { this.metaPath = Objects.requireNonNull(metaPath); String errorMsg = "Missing or invalid part of tablet file metadata entry: " + metaPath; + log.debug("Parsing TabletFile from {}", metaPath); // use Path object to step backwards from the filename through all the parts this.fileName = metaPath.getName(); - ServerColumnFamily.validateDirCol(fileName); + ValidationUtil.validateFileName(fileName); Path tabletDirPath = Objects.requireNonNull(metaPath.getParent(), errorMsg); diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/ValidationUtil.java b/core/src/main/java/org/apache/accumulo/core/metadata/ValidationUtil.java index 39c171d55e..7b8ba3ba01 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/ValidationUtil.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/ValidationUtil.java @@ -18,7 +18,9 @@ */ package org.apache.accumulo.core.metadata; -import org.apache.accumulo.core.gc.Reference; +import java.util.Objects; + +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.hadoop.fs.Path; /** @@ -37,8 +39,8 @@ public class ValidationUtil { return validate(p).toString(); } - public static Reference validate(Reference reference) { - validate(new Path(reference.metadataEntry)); + public static ReferenceFile validate(ReferenceFile reference) { + validate(new Path(reference.getMetadataEntry())); return reference; } @@ -48,4 +50,20 @@ public class ValidationUtil { } return path; } + + public static void validateRFileName(String fileName) { + Objects.requireNonNull(fileName); + if (!fileName.endsWith(".rf") && !fileName.endsWith("_tmp")) { + throw new IllegalArgumentException( + "Provided filename (" + fileName + ") does not end with '.rf' or '_tmp'"); + } + } + + public static void validateFileName(String fileName) { + Objects.requireNonNull(fileName); + if (!fileName.matches("[\\dA-Za-z._-]+")) { + throw new IllegalArgumentException( + "Provided filename (" + fileName + ") contains invalid characters."); + } + } } diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java b/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java index 25bf516026..251f6e6a3a 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java @@ -25,7 +25,7 @@ import java.util.stream.Stream; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.dataImpl.KeyExtent; -import org.apache.accumulo.core.gc.Reference; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.StoredTabletFile; @@ -191,7 +191,7 @@ public interface Ample { /** * Unlike {@link #putGcCandidates(TableId, Collection)} this takes file and dir GC candidates. */ - default void putGcFileAndDirCandidates(TableId tableId, Collection<Reference> candidates) { + default void putGcFileAndDirCandidates(TableId tableId, Collection<ReferenceFile> candidates) { throw new UnsupportedOperationException(); } @@ -218,16 +218,16 @@ public interface Ample { } /** - * Return an encoded delete marker Mutation to delete the specified TabletFile path. A Reference - * is used for the parameter because the Garbage Collector is optimized to store a directory for - * Tablet File. Otherwise, a {@link TabletFile} object could be used. The tabletFilePathToRemove - * is validated and normalized before creating the mutation. + * Return an encoded delete marker Mutation to delete the specified TabletFile path. A + * ReferenceFile is used for the parameter because the Garbage Collector is optimized to store a + * directory for Tablet File. Otherwise, a {@link TabletFile} object could be used. The + * tabletFilePathToRemove is validated and normalized before creating the mutation. * * @param tabletFilePathToRemove * String full path of the TabletFile * @return Mutation with encoded delete marker */ - default Mutation createDeleteMutation(Reference tabletFilePathToRemove) { + default Mutation createDeleteMutation(ReferenceFile tabletFilePathToRemove) { throw new UnsupportedOperationException(); } diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataSchema.java b/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataSchema.java index 707a46942c..26679caf1f 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataSchema.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataSchema.java @@ -192,11 +192,13 @@ public class MetadataSchema { public static final String DEFAULT_TABLET_DIR_NAME = "default_tablet"; /** + * Matches regex for a tablet directory like "default_tablet" or "t-000009x" + * * @return true if dirName is a valid value for the {@link #DIRECTORY_COLUMN} in the metadata * table. Returns false otherwise. */ public static boolean isValidDirCol(String dirName) { - return !dirName.contains("/"); + return dirName.matches("[\\dA-Za-z_-]+"); } /** diff --git a/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java b/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java new file mode 100644 index 0000000000..2dbc1705f3 --- /dev/null +++ b/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.server.gc; + +import static org.apache.accumulo.server.gc.GcVolumeUtil.ALL_VOLUMES_PREFIX; + +import org.apache.accumulo.core.Constants; +import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.gc.ReferenceFile; +import org.apache.accumulo.core.metadata.schema.MetadataSchema; +import org.apache.hadoop.fs.Path; + +/** + * A specially encoded GC Reference to a directory with the {@link GcVolumeUtil#ALL_VOLUMES_PREFIX} + */ +public class AllVolumesDirectory extends ReferenceFile { + + public AllVolumesDirectory(TableId tableId, String dirName) { + super(tableId, getDeleteTabletOnAllVolumesUri(tableId, dirName)); + } + + private static String getDeleteTabletOnAllVolumesUri(TableId tableId, String dirName) { + MetadataSchema.TabletsSection.ServerColumnFamily.validateDirCol(dirName); + return ALL_VOLUMES_PREFIX + Constants.TABLE_DIR + Path.SEPARATOR + tableId + Path.SEPARATOR + + dirName; + } + + @Override + public String getMetadataEntry() { + return metadataEntry; + } + + @Override + public boolean isDirectory() { + return true; + } +} diff --git a/server/base/src/main/java/org/apache/accumulo/server/gc/GcVolumeUtil.java b/server/base/src/main/java/org/apache/accumulo/server/gc/GcVolumeUtil.java index 75146de922..3f73b85ef4 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/gc/GcVolumeUtil.java +++ b/server/base/src/main/java/org/apache/accumulo/server/gc/GcVolumeUtil.java @@ -22,23 +22,12 @@ import java.util.Collection; import java.util.Collections; import java.util.stream.Collectors; -import org.apache.accumulo.core.Constants; -import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.gc.Reference; -import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily; import org.apache.accumulo.server.fs.VolumeManager; import org.apache.hadoop.fs.Path; public class GcVolumeUtil { // AGCAV : Accumulo Garbage Collector All Volumes - private static final String ALL_VOLUMES_PREFIX = "agcav:/"; - - public static Reference getDeleteTabletOnAllVolumesUri(TableId tableId, String dirName) { - ServerColumnFamily.validateDirCol(dirName); - String str = ALL_VOLUMES_PREFIX + Constants.TABLE_DIR + Path.SEPARATOR + tableId - + Path.SEPARATOR + dirName; - return new Reference(tableId, str); - } + static final String ALL_VOLUMES_PREFIX = "agcav:/"; public static Collection<Path> expandAllVolumesUri(VolumeManager fs, Path path) { if (path.toString().startsWith(ALL_VOLUMES_PREFIX)) { diff --git a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java index 2f357033af..2f1364358c 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java +++ b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java @@ -35,7 +35,7 @@ import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.dataImpl.KeyExtent; -import org.apache.accumulo.core.gc.Reference; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.StoredTabletFile; @@ -121,13 +121,13 @@ public class ServerAmpleImpl extends AmpleImpl implements Ample { } @Override - public void putGcFileAndDirCandidates(TableId tableId, Collection<Reference> candidates) { + public void putGcFileAndDirCandidates(TableId tableId, Collection<ReferenceFile> candidates) { if (RootTable.ID.equals(tableId)) { // Directories are unexpected for the root tablet, so convert to stored tablet file - mutateRootGcCandidates(rgcc -> rgcc.add( - candidates.stream().map(reference -> new StoredTabletFile(reference.metadataEntry)))); + mutateRootGcCandidates(rgcc -> rgcc.add(candidates.stream() + .map(reference -> new StoredTabletFile(reference.getMetadataEntry())))); return; } @@ -204,8 +204,8 @@ public class ServerAmpleImpl extends AmpleImpl implements Ample { } @Override - public Mutation createDeleteMutation(Reference tabletFilePathToRemove) { - return createDelMutation(ValidationUtil.validate(tabletFilePathToRemove).metadataEntry); + public Mutation createDeleteMutation(ReferenceFile tabletFilePathToRemove) { + return createDelMutation(ValidationUtil.validate(tabletFilePathToRemove).getMetadataEntry()); } public Mutation createDeleteMutation(StoredTabletFile pathToRemove) { diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java index 598437527a..03a0478701 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java @@ -62,7 +62,7 @@ import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.dataImpl.KeyExtent; -import org.apache.accumulo.core.gc.Reference; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.StoredTabletFile; @@ -95,7 +95,7 @@ import org.apache.accumulo.core.util.Pair; import org.apache.accumulo.fate.FateTxId; import org.apache.accumulo.fate.zookeeper.ServiceLock; import org.apache.accumulo.server.ServerContext; -import org.apache.accumulo.server.gc.GcVolumeUtil; +import org.apache.accumulo.server.gc.AllVolumesDirectory; import org.apache.hadoop.io.Text; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -374,12 +374,11 @@ public class MetadataTableUtil { if (key.getColumnFamily().equals(DataFileColumnFamily.NAME)) { StoredTabletFile stf = new StoredTabletFile(key.getColumnQualifierData().toString()); bw.addMutation( - ample.createDeleteMutation(new Reference(tableId, stf.getMetaUpdateDelete()))); + ample.createDeleteMutation(new ReferenceFile(tableId, stf.getMetaUpdateDelete()))); } if (ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) { - var uri = - GcVolumeUtil.getDeleteTabletOnAllVolumesUri(tableId, cell.getValue().toString()); + var uri = new AllVolumesDirectory(tableId, cell.getValue().toString()); bw.addMutation(ample.createDeleteMutation(uri)); } } diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java index 14a5505d93..c384830fb0 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java @@ -47,6 +47,7 @@ import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.gc.Reference; import org.apache.accumulo.core.gc.ReferenceDirectory; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.ValidationUtil; @@ -139,21 +140,28 @@ public class GCRun implements GarbageCollectionEnvironment { public Stream<Reference> getReferences() { Stream<TabletMetadata> tabletStream; + // create a stream of metadata entries read from file, scan and tablet dir columns if (level == Ample.DataLevel.ROOT) { tabletStream = Stream.of(context.getAmple().readTablet(RootTable.EXTENT, DIR, FILES, SCANS)); } else { - tabletStream = TabletsMetadata.builder(context).scanTable(level.metaTable()) - .checkConsistency().fetch(DIR, FILES, SCANS).build().stream(); + var tabletsMetadata = TabletsMetadata.builder(context).scanTable(level.metaTable()) + .checkConsistency().fetch(DIR, FILES, SCANS).build(); + tabletStream = tabletsMetadata.stream(); } + // there is a lot going on in this "one line" so see below for more info return tabletStream.flatMap(tm -> { - Stream<Reference> refs = Stream.concat(tm.getFiles().stream(), tm.getScans().stream()) - .map(f -> new Reference(tm.getTableId(), f.getMetaUpdateDelete())); + // combine all the entries read from file and scan columns in the metadata table + var fileStream = Stream.concat(tm.getFiles().stream(), tm.getScans().stream()); + // map the files to Reference objects + var stream = fileStream.map(f -> new ReferenceFile(tm.getTableId(), f.getMetaUpdateDelete())); + // if dirName is populated then we have a tablet directory aka srv:dir if (tm.getDirName() != null) { - refs = Stream.concat(refs, - Stream.of(new ReferenceDirectory(tm.getTableId(), tm.getDirName()))); + // add the tablet directory to the stream + var tabletDir = new ReferenceDirectory(tm.getTableId(), tm.getDirName()); + stream = Stream.concat(stream, Stream.of(tabletDir)); } - return refs; + return stream; }); } diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java index bbe123a4f6..e91f47a682 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java @@ -135,25 +135,27 @@ public class GarbageCollectionAlgorithm { private void removeCandidatesInUse(GarbageCollectionEnvironment gce, SortedMap<String,String> candidateMap) { - Iterator<Reference> iter = gce.getReferences().iterator(); + var refStream = gce.getReferences(); + Iterator<Reference> iter = refStream.iterator(); while (iter.hasNext()) { Reference ref = iter.next(); - if (ref instanceof ReferenceDirectory) { + if (ref.isDirectory()) { var dirReference = (ReferenceDirectory) ref; - ServerColumnFamily.validateDirCol(dirReference.tabletDir); + ServerColumnFamily.validateDirCol(dirReference.getTabletDir()); - String dir = "/" + dirReference.tableId + "/" + dirReference.tabletDir; + String dir = "/" + dirReference.tableId + "/" + dirReference.getTabletDir(); dir = makeRelative(dir, 2); if (candidateMap.remove(dir) != null) log.debug("Candidate was still in use: {}", dir); } else { - String reference = ref.metadataEntry; + String reference = ref.getMetadataEntry(); if (reference.startsWith("/")) { - log.debug("Candidate {} has a relative path, prepend tableId {}", reference, ref.tableId); - reference = "/" + ref.tableId + ref.metadataEntry; + log.debug("Candidate {} has a relative path, prepend tableId {}", reference, + ref.getTableId()); + reference = "/" + ref.getTableId() + ref.getMetadataEntry(); } else if (!reference.contains(":") && !reference.startsWith("../")) { throw new RuntimeException("Bad file reference " + reference); } diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java index c73ecb4e2b..a754747cce 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java @@ -28,9 +28,7 @@ import java.util.SortedMap; import java.util.stream.Stream; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.gc.Reference; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; @@ -68,10 +66,9 @@ public interface GarbageCollectionEnvironment { /** * Fetches the references to files, {@link DataFileColumnFamily#NAME} or - * {@link ScanFileColumnFamily#NAME}, from tablets + * {@link ScanFileColumnFamily#NAME}, from tablets and tablet directories. * - * @return An {@link Iterator} of {@link Entry}<{@link Key}, {@link Value}> which constitute - * a reference to a file. + * @return An {@link Stream} of {@link Reference} objects, that will need to be closed. */ Stream<Reference> getReferences(); diff --git a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java index 59fb2bea02..47db92398f 100644 --- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java +++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java @@ -39,6 +39,7 @@ import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.gc.Reference; import org.apache.accumulo.core.gc.ReferenceDirectory; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.server.replication.proto.Replication.Status; import org.junit.jupiter.api.Test; @@ -95,7 +96,8 @@ public class GarbageCollectionTest { } public void addFileReference(String tableId, String endRow, String file) { - references.put(tableId + ":" + endRow + ":" + file, new Reference(TableId.of(tableId), file)); + references.put(tableId + ":" + endRow + ":" + file, + new ReferenceFile(TableId.of(tableId), file)); } public void removeFileReference(String tableId, String endRow, String file) { diff --git a/server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java b/server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java index f6d28b2038..01e26db7eb 100644 --- a/server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java +++ b/server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java @@ -48,7 +48,7 @@ import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.volume.Volume; import org.apache.accumulo.server.ServerContext; import org.apache.accumulo.server.fs.VolumeManager; -import org.apache.accumulo.server.gc.GcVolumeUtil; +import org.apache.accumulo.server.gc.AllVolumesDirectory; import org.apache.accumulo.server.security.SystemCredentials; import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.BeforeEach; @@ -171,13 +171,13 @@ public class SimpleGarbageCollectorTest { confirmed.put("5a/t-0001/F0001.rf", "hdfs://nn1/accumulo/tables/5a/t-0001/F0001.rf"); confirmed.put("5a/t-0001/F0002.rf", "hdfs://nn1/accumulo/tables/5a/t-0001/F0002.rf"); confirmed.put("5a/t-0002/F0001.rf", "hdfs://nn1/accumulo/tables/5a/t-0002/F0001.rf"); - var uri = GcVolumeUtil.getDeleteTabletOnAllVolumesUri(TableId.of("5b"), "t-0003"); - confirmed.put("5b/t-0003", uri.metadataEntry); + var allVolumesDirectory = new AllVolumesDirectory(TableId.of("5b"), "t-0003"); + confirmed.put("5b/t-0003", allVolumesDirectory.getMetadataEntry()); confirmed.put("5b/t-0003/F0001.rf", "hdfs://nn1/accumulo/tables/5b/t-0003/F0001.rf"); confirmed.put("5b/t-0003/F0002.rf", "hdfs://nn2/accumulo/tables/5b/t-0003/F0002.rf"); confirmed.put("5b/t-0003/F0003.rf", "hdfs://nn3/accumulo/tables/5b/t-0003/F0003.rf"); - uri = GcVolumeUtil.getDeleteTabletOnAllVolumesUri(TableId.of("5b"), "t-0004"); - confirmed.put("5b/t-0004", uri.metadataEntry); + allVolumesDirectory = new AllVolumesDirectory(TableId.of("5b"), "t-0004"); + confirmed.put("5b/t-0004", allVolumesDirectory.getMetadataEntry()); confirmed.put("5b/t-0004/F0001.rf", "hdfs://nn1/accumulo/tables/5b/t-0004/F0001.rf"); List<String> processedDeletes = new ArrayList<>(); @@ -187,11 +187,11 @@ public class SimpleGarbageCollectorTest { TreeMap<String,String> expected = new TreeMap<>(); expected.put("5a/t-0001", "hdfs://nn1/accumulo/tables/5a/t-0001"); expected.put("5a/t-0002/F0001.rf", "hdfs://nn1/accumulo/tables/5a/t-0002/F0001.rf"); - uri = GcVolumeUtil.getDeleteTabletOnAllVolumesUri(TableId.of("5b"), "t-0003"); - expected.put("5b/t-0003", uri.metadataEntry); + allVolumesDirectory = new AllVolumesDirectory(TableId.of("5b"), "t-0003"); + expected.put("5b/t-0003", allVolumesDirectory.getMetadataEntry()); expected.put("5b/t-0003/F0003.rf", "hdfs://nn3/accumulo/tables/5b/t-0003/F0003.rf"); - uri = GcVolumeUtil.getDeleteTabletOnAllVolumesUri(TableId.of("5b"), "t-0004"); - expected.put("5b/t-0004", uri.metadataEntry); + allVolumesDirectory = new AllVolumesDirectory(TableId.of("5b"), "t-0004"); + expected.put("5b/t-0004", allVolumesDirectory.getMetadataEntry()); assertEquals(expected, confirmed); assertEquals(Arrays.asList("hdfs://nn1/accumulo/tables/5a/t-0001/F0001.rf", diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index 7047988144..03e3d1ed47 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -52,7 +52,7 @@ import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.dataImpl.KeyExtent; -import org.apache.accumulo.core.gc.Reference; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.logging.TabletLogger; import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.manager.thrift.ManagerState; @@ -83,7 +83,7 @@ import org.apache.accumulo.manager.state.TableCounts; import org.apache.accumulo.manager.state.TableStats; import org.apache.accumulo.server.ServerContext; import org.apache.accumulo.server.conf.TableConfiguration; -import org.apache.accumulo.server.gc.GcVolumeUtil; +import org.apache.accumulo.server.gc.AllVolumesDirectory; import org.apache.accumulo.server.log.WalStateManager; import org.apache.accumulo.server.log.WalStateManager.WalMarkerException; import org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection; @@ -636,12 +636,12 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { ServerColumnFamily.TIME_COLUMN.fetch(scanner); scanner.fetchColumnFamily(DataFileColumnFamily.NAME); scanner.fetchColumnFamily(CurrentLocationColumnFamily.NAME); - Set<Reference> datafilesAndDirs = new TreeSet<>(); + Set<ReferenceFile> datafilesAndDirs = new TreeSet<>(); for (Entry<Key,Value> entry : scanner) { Key key = entry.getKey(); if (key.compareColumnFamily(DataFileColumnFamily.NAME) == 0) { var stf = new StoredTabletFile(key.getColumnQualifierData().toString()); - datafilesAndDirs.add(new Reference(stf.getTableId(), stf.getMetaUpdateDelete())); + datafilesAndDirs.add(new ReferenceFile(stf.getTableId(), stf.getMetaUpdateDelete())); if (datafilesAndDirs.size() > 1000) { ample.putGcFileAndDirCandidates(extent.tableId(), datafilesAndDirs); datafilesAndDirs.clear(); @@ -652,9 +652,9 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { throw new IllegalStateException( "Tablet " + key.getRow() + " is assigned during a merge!"); } else if (ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) { - Reference path = GcVolumeUtil.getDeleteTabletOnAllVolumesUri(extent.tableId(), - entry.getValue().toString()); - datafilesAndDirs.add(path); + var allVolumesDirectory = + new AllVolumesDirectory(extent.tableId(), entry.getValue().toString()); + datafilesAndDirs.add(allVolumesDirectory); if (datafilesAndDirs.size() > 1000) { ample.putGcFileAndDirCandidates(extent.tableId(), datafilesAndDirs); datafilesAndDirs.clear(); @@ -738,9 +738,8 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { maxLogicalTime = TabletTime.maxMetadataTime(maxLogicalTime, MetadataTime.parse(value.toString())); } else if (ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) { - Reference uri = - GcVolumeUtil.getDeleteTabletOnAllVolumesUri(range.tableId(), value.toString()); - bw.addMutation(manager.getContext().getAmple().createDeleteMutation(uri)); + var allVolumesDir = new AllVolumesDirectory(range.tableId(), value.toString()); + bw.addMutation(manager.getContext().getAmple().createDeleteMutation(allVolumesDir)); } } diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java index 8b4db0afe5..31534593fe 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java @@ -23,7 +23,7 @@ import java.util.Collections; import org.apache.accumulo.core.Constants; import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.gc.Reference; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.master.thrift.BulkImportState; import org.apache.accumulo.fate.FateTxId; import org.apache.accumulo.fate.Repo; @@ -62,7 +62,7 @@ public class CleanUpBulkImport extends ManagerRepo { MetadataTableUtil.removeBulkLoadInProgressFlag(manager.getContext(), "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); manager.getContext().getAmple().putGcFileAndDirCandidates(tableId, - Collections.singleton(new Reference(tableId, bulkDir.toString()))); + Collections.singleton(new ReferenceFile(tableId, bulkDir.toString()))); log.debug("removing the metadata table markers for loaded files"); AccumuloClient client = manager.getContext(); MetadataTableUtil.removeBulkLoadEntries(client, tableId, tid); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java index 358b1b3d54..2896077117 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java @@ -23,7 +23,7 @@ import java.util.Collections; import org.apache.accumulo.core.Constants; import org.apache.accumulo.core.client.AccumuloClient; -import org.apache.accumulo.core.gc.Reference; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.master.thrift.BulkImportState; import org.apache.accumulo.fate.FateTxId; @@ -57,7 +57,7 @@ public class CleanUpBulkImport extends ManagerRepo { MetadataTableUtil.removeBulkLoadInProgressFlag(manager.getContext(), "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); manager.getContext().getAmple().putGcFileAndDirCandidates(info.tableId, - Collections.singleton(new Reference(info.tableId, bulkDir.toString()))); + Collections.singleton(new ReferenceFile(info.tableId, bulkDir.toString()))); if (info.tableState == TableState.ONLINE) { log.debug("removing the metadata table markers for loaded files"); AccumuloClient client = manager.getContext(); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java index 541ee68489..79846de5c9 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java @@ -52,7 +52,7 @@ import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.file.FileOperations; import org.apache.accumulo.core.file.FileSKVIterator; -import org.apache.accumulo.core.gc.Reference; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.TServerInstance; @@ -75,6 +75,7 @@ import org.apache.accumulo.fate.zookeeper.ZooUtil.NodeMissingPolicy; import org.apache.accumulo.server.ServerContext; import org.apache.accumulo.server.conf.util.ConfigPropertyUpgrader; import org.apache.accumulo.server.fs.VolumeManager; +import org.apache.accumulo.server.gc.AllVolumesDirectory; import org.apache.accumulo.server.gc.GcVolumeUtil; import org.apache.accumulo.server.metadata.RootGcCandidates; import org.apache.accumulo.server.metadata.TabletMutatorBase; @@ -482,7 +483,7 @@ public class Upgrader9to10 implements Upgrader { log.trace("upgrading delete entry for {}", olddelete); Path absolutePath = resolveRelativeDelete(olddelete, upgradeProp); - Reference updatedDel = switchToAllVolumes(absolutePath); + ReferenceFile updatedDel = switchToAllVolumes(absolutePath); writer.addMutation(ample.createDeleteMutation(updatedDel)); } @@ -508,7 +509,7 @@ public class Upgrader9to10 implements Upgrader { * "tables/5a/t-0005/A0012.rf" depth = 4 will be returned as is. */ @VisibleForTesting - static Reference switchToAllVolumes(Path olddelete) { + static ReferenceFile switchToAllVolumes(Path olddelete) { Path pathNoVolume = Objects.requireNonNull(VolumeManager.FileType.TABLE.removeVolume(olddelete), "Invalid delete marker. No volume in path: " + olddelete); @@ -518,16 +519,16 @@ public class Upgrader9to10 implements Upgrader { var tableId = TableId.of(pathNoVolume.getParent().getName()); // except bulk directories don't get an all volume prefix if (pathNoVolume.getName().startsWith(Constants.BULK_PREFIX)) { - return new Reference(tableId, olddelete.toString()); + return new ReferenceFile(tableId, olddelete.toString()); } else { - return GcVolumeUtil.getDeleteTabletOnAllVolumesUri(tableId, tabletDir); + return new AllVolumesDirectory(tableId, tabletDir); } } else { // depth of 4 should be a file like, "tables/5a/t-0005/A0012.rf" if (pathNoVolume.depth() == 4) { Path tabletDirPath = pathNoVolume.getParent(); var tableId = TableId.of(tabletDirPath.getParent().getName()); - return new Reference(tableId, olddelete.toString()); + return new ReferenceFile(tableId, olddelete.toString()); } else { throw new IllegalStateException("Invalid delete marker: " + olddelete); } diff --git a/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java b/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java index fa1e86c240..a1ad4415e1 100644 --- a/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java +++ b/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java @@ -51,7 +51,7 @@ import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.gc.Reference; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily; import org.apache.accumulo.core.security.Authorizations; @@ -59,7 +59,7 @@ import org.apache.accumulo.core.volume.Volume; import org.apache.accumulo.core.volume.VolumeImpl; import org.apache.accumulo.server.ServerContext; import org.apache.accumulo.server.fs.VolumeManager; -import org.apache.accumulo.server.gc.GcVolumeUtil; +import org.apache.accumulo.server.gc.AllVolumesDirectory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -79,25 +79,25 @@ public class Upgrader9to10Test { public void testSwitchRelativeDeletes() { Path resolved = Upgrader9to10.resolveRelativeDelete("/5a/t-0005", VOL_PROP); assertEquals(new Path(VOL_PROP + "/tables/5a/t-0005"), resolved); - var ref1 = GcVolumeUtil.getDeleteTabletOnAllVolumesUri(tableId5a, "t-0005"); - var ref2 = Upgrader9to10.switchToAllVolumes(resolved); - compareReferences(ref1, ref2); + var allVolumesDir = new AllVolumesDirectory(tableId5a, "t-0005"); + var ref1 = Upgrader9to10.switchToAllVolumes(resolved); + compareReferences(allVolumesDir, ref1); resolved = Upgrader9to10.resolveRelativeDelete("/5a/" + BULK_PREFIX + "0005", VOL_PROP); assertEquals(new Path(VOL_PROP + "/tables/5a/" + BULK_PREFIX + "0005"), resolved); - ref1 = new Reference(tableId5a, VOL_PROP + "/tables/5a/" + BULK_PREFIX + "0005"); - ref2 = Upgrader9to10.switchToAllVolumes(resolved); + ref1 = new ReferenceFile(tableId5a, VOL_PROP + "/tables/5a/" + BULK_PREFIX + "0005"); + var ref2 = Upgrader9to10.switchToAllVolumes(resolved); compareReferences(ref1, ref2); resolved = Upgrader9to10.resolveRelativeDelete("/5a/t-0005/F0009.rf", VOL_PROP); assertEquals(new Path(VOL_PROP + "/tables/5a/t-0005/F0009.rf"), resolved); - ref1 = new Reference(tableId5a, VOL_PROP + "/tables/5a/t-0005/F0009.rf"); + ref1 = new ReferenceFile(tableId5a, VOL_PROP + "/tables/5a/t-0005/F0009.rf"); ref2 = Upgrader9to10.switchToAllVolumes(resolved); compareReferences(ref1, ref2); } - private void compareReferences(Reference ref1, Reference ref2) { - assertEquals(ref1.metadataEntry, ref2.metadataEntry); + private void compareReferences(ReferenceFile ref1, ReferenceFile ref2) { + assertEquals(ref1.getMetadataEntry(), ref2.getMetadataEntry()); assertEquals(ref1.tableId, ref2.tableId); } @@ -117,20 +117,20 @@ public class Upgrader9to10Test { public void testSwitchAllVolumes() { Path resolved = Upgrader9to10 .resolveRelativeDelete("hdfs://localhost:9000/accumulo/tables/5a/t-0005", VOL_PROP); - var ref1 = GcVolumeUtil.getDeleteTabletOnAllVolumesUri(tableId5a, "t-0005"); - var ref2 = Upgrader9to10.switchToAllVolumes(resolved); - compareReferences(ref1, ref2); + var allVolumesDir = new AllVolumesDirectory(tableId5a, "t-0005"); + var ref1 = Upgrader9to10.switchToAllVolumes(resolved); + compareReferences(allVolumesDir, ref1); resolved = Upgrader9to10.resolveRelativeDelete( "hdfs://localhost:9000/accumulo/tables/5a/" + BULK_PREFIX + "0005", VOL_PROP); - ref1 = new Reference(tableId5a, + ref1 = new ReferenceFile(tableId5a, "hdfs://localhost:9000/accumulo/tables/5a/" + BULK_PREFIX + "0005"); - ref2 = Upgrader9to10.switchToAllVolumes(resolved); + var ref2 = Upgrader9to10.switchToAllVolumes(resolved); compareReferences(ref1, ref2); resolved = Upgrader9to10.resolveRelativeDelete( "hdfs://localhost:9000/accumulo/tables/5a/t-0005/C0009.rf", VOL_PROP); - ref1 = new Reference(tableId5a, "hdfs://localhost:9000/accumulo/tables/5a/t-0005/C0009.rf"); + ref1 = new ReferenceFile(tableId5a, "hdfs://localhost:9000/accumulo/tables/5a/t-0005/C0009.rf"); ref2 = Upgrader9to10.switchToAllVolumes(resolved); compareReferences(ref1, ref2); } diff --git a/test/src/main/java/org/apache/accumulo/test/CloneIT.java b/test/src/main/java/org/apache/accumulo/test/CloneIT.java index a75bce9642..65aba86a99 100644 --- a/test/src/main/java/org/apache/accumulo/test/CloneIT.java +++ b/test/src/main/java/org/apache/accumulo/test/CloneIT.java @@ -265,17 +265,17 @@ public class CloneIT extends AccumuloClusterHarness { try (BatchWriter bw1 = client.createBatchWriter(tableName); BatchWriter bw2 = client.createBatchWriter(tableName)) { - bw1.addMutation(createTablet("0", "m", null, "/d1", filePrefix + "/d1/file1")); - bw1.addMutation(createTablet("0", null, "m", "/d2", filePrefix + "/d2/file2")); + bw1.addMutation(createTablet("0", "m", null, "/d1", filePrefix + "/d1/file1.rf")); + bw1.addMutation(createTablet("0", null, "m", "/d2", filePrefix + "/d2/file2.rf")); bw1.flush(); MetadataTableUtil.initializeClone(tableName, TableId.of("0"), TableId.of("1"), client, bw2); - bw1.addMutation(createTablet("0", "f", null, "/d1", filePrefix + "/d1/file3")); - bw1.addMutation(createTablet("0", "m", "f", "/d3", filePrefix + "/d1/file1")); - bw1.addMutation(createTablet("0", "s", "m", "/d2", filePrefix + "/d2/file2")); - bw1.addMutation(createTablet("0", null, "s", "/d4", filePrefix + "/d2/file2")); + bw1.addMutation(createTablet("0", "f", null, "/d1", filePrefix + "/d1/file3.rf")); + bw1.addMutation(createTablet("0", "m", "f", "/d3", filePrefix + "/d1/file1.rf")); + bw1.addMutation(createTablet("0", "s", "m", "/d2", filePrefix + "/d2/file2.rf")); + bw1.addMutation(createTablet("0", null, "s", "/d4", filePrefix + "/d2/file2.rf")); bw1.flush(); @@ -299,8 +299,8 @@ public class CloneIT extends AccumuloClusterHarness { } assertEquals(2, count); assertEquals(2, files.size()); - assertTrue(files.contains(filePrefix + "/d1/file1")); - assertTrue(files.contains(filePrefix + "/d2/file2")); + assertTrue(files.contains(filePrefix + "/d1/file1.rf")); + assertTrue(files.contains(filePrefix + "/d2/file2.rf")); } } @@ -314,22 +314,22 @@ public class CloneIT extends AccumuloClusterHarness { try (BatchWriter bw1 = client.createBatchWriter(tableName); BatchWriter bw2 = client.createBatchWriter(tableName)) { - bw1.addMutation(createTablet("0", "m", null, "/d1", filePrefix + "/d1/file1")); - bw1.addMutation(createTablet("0", null, "m", "/d2", filePrefix + "/d2/file2")); + bw1.addMutation(createTablet("0", "m", null, "/d1", filePrefix + "/d1/file1.rf")); + bw1.addMutation(createTablet("0", null, "m", "/d2", filePrefix + "/d2/file2.rf")); bw1.flush(); MetadataTableUtil.initializeClone(tableName, TableId.of("0"), TableId.of("1"), client, bw2); - bw1.addMutation(deleteTablet("0", "m", null, filePrefix + "/d1/file1")); - bw1.addMutation(deleteTablet("0", null, "m", filePrefix + "/d2/file2")); + bw1.addMutation(deleteTablet("0", "m", null, filePrefix + "/d1/file1.rf")); + bw1.addMutation(deleteTablet("0", null, "m", filePrefix + "/d2/file2.rf")); bw1.flush(); - bw1.addMutation(createTablet("0", "f", null, "/d1", filePrefix + "/d1/file3")); - bw1.addMutation(createTablet("0", "m", "f", "/d3", filePrefix + "/d1/file1")); - bw1.addMutation(createTablet("0", "s", "m", "/d2", filePrefix + "/d2/file3")); - bw1.addMutation(createTablet("0", null, "s", "/d4", filePrefix + "/d4/file3")); + bw1.addMutation(createTablet("0", "f", null, "/d1", filePrefix + "/d1/file3.rf")); + bw1.addMutation(createTablet("0", "m", "f", "/d3", filePrefix + "/d1/file1.rf")); + bw1.addMutation(createTablet("0", "s", "m", "/d2", filePrefix + "/d2/file3.rf")); + bw1.addMutation(createTablet("0", null, "s", "/d4", filePrefix + "/d4/file3.rf")); bw1.flush(); @@ -338,11 +338,11 @@ public class CloneIT extends AccumuloClusterHarness { assertEquals(1, rc); - bw1.addMutation(deleteTablet("0", "m", "f", filePrefix + "/d1/file1")); + bw1.addMutation(deleteTablet("0", "m", "f", filePrefix + "/d1/file1.rf")); bw1.flush(); - bw1.addMutation(createTablet("0", "m", "f", "/d3", filePrefix + "/d1/file3")); + bw1.addMutation(createTablet("0", "m", "f", "/d3", filePrefix + "/d1/file3.rf")); bw1.flush(); @@ -364,9 +364,9 @@ public class CloneIT extends AccumuloClusterHarness { } assertEquals(3, count); assertEquals(3, files.size()); - assertTrue(files.contains("hdfs://nn:8000/accumulo/tables/0/d1/file1")); - assertTrue(files.contains("hdfs://nn:8000/accumulo/tables/0/d2/file3")); - assertTrue(files.contains("hdfs://nn:8000/accumulo/tables/0/d4/file3")); + assertTrue(files.contains("hdfs://nn:8000/accumulo/tables/0/d1/file1.rf")); + assertTrue(files.contains("hdfs://nn:8000/accumulo/tables/0/d2/file3.rf")); + assertTrue(files.contains("hdfs://nn:8000/accumulo/tables/0/d4/file3.rf")); } } @@ -380,16 +380,16 @@ public class CloneIT extends AccumuloClusterHarness { try (BatchWriter bw1 = client.createBatchWriter(tableName); BatchWriter bw2 = client.createBatchWriter(tableName)) { - bw1.addMutation(createTablet("0", "m", null, "/d1", filePrefix + "/d1/file1")); - bw1.addMutation(createTablet("0", null, "m", "/d2", filePrefix + "/d2/file2")); + bw1.addMutation(createTablet("0", "m", null, "/d1", filePrefix + "/d1/file1.rf")); + bw1.addMutation(createTablet("0", null, "m", "/d2", filePrefix + "/d2/file2.rf")); bw1.flush(); MetadataTableUtil.initializeClone(tableName, TableId.of("0"), TableId.of("1"), client, bw2); - bw1.addMutation(deleteTablet("0", "m", null, filePrefix + "/d1/file1")); - Mutation mut = createTablet("0", null, null, "/d2", filePrefix + "/d2/file2"); - mut.put(DataFileColumnFamily.NAME.toString(), filePrefix + "/d1/file1", + bw1.addMutation(deleteTablet("0", "m", null, filePrefix + "/d1/file1.rf")); + Mutation mut = createTablet("0", null, null, "/d2", filePrefix + "/d2/file2.rf"); + mut.put(DataFileColumnFamily.NAME.toString(), filePrefix + "/d1/file1.rf", new DataFileValue(10, 200).encodeAsString()); bw1.addMutation(mut); diff --git a/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java b/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java index 4eab501a0f..28a20afe25 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java @@ -42,7 +42,7 @@ import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.gc.Reference; +import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.MetadataSchema.DeletesSection; @@ -124,23 +124,35 @@ public class GarbageCollectorIT extends ConfigurableMacBase { c.tableOperations().setProperty(table, Property.TABLE_SPLIT_THRESHOLD.getKey(), "5K"); VerifyParams params = new VerifyParams(getClientProperties(), table, 10_000); params.cols = 1; + log.info("Ingesting files to {}", table); TestIngest.ingest(c, cluster.getFileSystem(), params); + log.info("Compacting the table {}", table); c.tableOperations().compact(table, null, null, true, true); - int before = countFiles(); + String pathString = cluster.getConfig().getDir() + "/accumulo/tables/1/*/*.rf"; + log.info("Counting files in path: {}", pathString); + + int before = countFiles(pathString); + log.info("Counted {} files in path: {}", before, pathString); + while (true) { sleepUninterruptibly(1, TimeUnit.SECONDS); - int more = countFiles(); + int more = countFiles(pathString); if (more <= before) break; before = more; } // restart GC + log.info("Restarting GC..."); getCluster().start(); sleepUninterruptibly(15, TimeUnit.SECONDS); - int after = countFiles(); + log.info("Again Counting files in path: {}", pathString); + + int after = countFiles(pathString); + log.info("Counted {} files in path: {}", after, pathString); + VerifyIngest.verifyIngest(c, params); - assertTrue(after < before); + assertTrue(after < before, "After count " + after + " was not less than " + before); } } @@ -297,8 +309,8 @@ public class GarbageCollectorIT extends ConfigurableMacBase { } } - private int countFiles() throws Exception { - Path path = new Path(cluster.getConfig().getDir() + "/accumulo/tables/1/*/*.rf"); + private int countFiles(String pathStr) throws Exception { + Path path = new Path(pathStr); return Iterators.size(Arrays.asList(cluster.getFileSystem().globStatus(path)).iterator()); } @@ -311,7 +323,7 @@ public class GarbageCollectorIT extends ConfigurableMacBase { String longpath = "aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee" + "ffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjj"; var path = String.format("file:/%020d/%s", i, longpath); - Mutation delFlag = ample.createDeleteMutation(new Reference(TableId.of("1"), path)); + Mutation delFlag = ample.createDeleteMutation(new ReferenceFile(TableId.of("1"), path)); bw.addMutation(delFlag); } } diff --git a/test/src/main/java/org/apache/accumulo/test/upgrade/GCUpgrade9to10TestIT.java b/test/src/main/java/org/apache/accumulo/test/upgrade/GCUpgrade9to10TestIT.java index f5c1888f2a..b606832c6e 100644 --- a/test/src/main/java/org/apache/accumulo/test/upgrade/GCUpgrade9to10TestIT.java +++ b/test/src/main/java/org/apache/accumulo/test/upgrade/GCUpgrade9to10TestIT.java @@ -50,7 +50,7 @@ import org.apache.accumulo.manager.upgrade.Upgrader9to10; import org.apache.accumulo.minicluster.ServerType; import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl; import org.apache.accumulo.miniclusterImpl.ProcessNotFoundException; -import org.apache.accumulo.server.gc.GcVolumeUtil; +import org.apache.accumulo.server.gc.AllVolumesDirectory; import org.apache.accumulo.test.functional.ConfigurableMacBase; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -241,8 +241,7 @@ public class GCUpgrade9to10TestIT extends ConfigurableMacBase { Mutation delFlag = createOldDelMutation(longpath, "", "", ""); bw.addMutation(delFlag); expected.put( - DeletesSection.encodeRow( - GcVolumeUtil.getDeleteTabletOnAllVolumesUri(tableId, dirName).metadataEntry), + DeletesSection.encodeRow(new AllVolumesDirectory(tableId, dirName).getMetadataEntry()), Upgrader9to10.UPGRADED.toString()); }