dlmarion commented on code in PR #2900:
URL: https://github.com/apache/accumulo/pull/2900#discussion_r977837640
##########
core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java:
##########
@@ -1076,11 +1076,21 @@ Map<String,Integer> listConstraints(String tableName)
throws AccumuloException, TableNotFoundException;
/**
- * Gets the number of bytes being used in the files for a set of tables
+ * Gets the number of bytes being used by the files for a set of tables.
This operation will use
+ * scan the metadata table for file size information to compute the size
metrics for the tables.
+ *
+ * Because the metadata table is used for computing usage and not the actual
files in HDFS the
+ * results will be an estimate. Older entries may exist with no file
metadata (resulting in size
+ * 0) and other actions in the cluster can impact the estimated size such as
flushes, tablet
+ * splits, compactions, etc.
+ *
+ * For the most accurate information a compaction should first be run on the
set of tables being
Review Comment:
```suggestion
* For more accurate information a compaction should first be run on all
files for the set of tables being
```
##########
core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java:
##########
@@ -1076,11 +1076,21 @@ Map<String,Integer> listConstraints(String tableName)
throws AccumuloException, TableNotFoundException;
/**
- * Gets the number of bytes being used in the files for a set of tables
+ * Gets the number of bytes being used by the files for a set of tables.
This operation will use
Review Comment:
```suggestion
* Gets the number of bytes being used by the files for a set of tables.
This operation will
```
##########
shell/src/main/java/org/apache/accumulo/shell/commands/DUCommand.java:
##########
@@ -95,9 +107,14 @@ public int execute(final String fullCommand, final
CommandLine cl, final Shell s
@Override
public String description() {
- return "prints how much space, in bytes, is used by files referenced by a"
- + " table. When multiple tables are specified it prints how much
space, in"
- + " bytes, is used by files shared between tables, if any.";
+ return "Prints how much estimated space, in bytes, is used by files
referenced by a "
Review Comment:
```suggestion
return "Prints estimated space, in bytes, used by files referenced by a "
```
##########
shell/src/main/java/org/apache/accumulo/shell/commands/DUCommand.java:
##########
@@ -95,9 +107,14 @@ public int execute(final String fullCommand, final
CommandLine cl, final Shell s
@Override
public String description() {
- return "prints how much space, in bytes, is used by files referenced by a"
- + " table. When multiple tables are specified it prints how much
space, in"
- + " bytes, is used by files shared between tables, if any.";
+ return "Prints how much estimated space, in bytes, is used by files
referenced by a "
+ + "table or tables. When multiple tables are specified it prints how
much space, in "
+ + "bytes, are used by files shared between tables, if any. Because the
metadata table "
+ + "is used for the file size information and not the actual files in
HDFS the results "
+ + "will be an estimate. Older entries may exist with no file metadata
(resulting in size 0) and "
+ + "other actions in the cluster can impact the estimated size such as
flushes, tablet splits, "
+ + "compactions, etc. For the most accurate information a compaction
should first be run on the "
Review Comment:
```suggestion
+ "compactions, etc. For more accurate information a compaction
should first be run on all of the files for the "
```
##########
server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java:
##########
@@ -42,25 +44,40 @@
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.metadata.MetadataTable;
-import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
+import org.apache.accumulo.core.metadata.RootTable;
+import org.apache.accumulo.core.metadata.TabletFile;
+import org.apache.accumulo.core.metadata.schema.DataFileValue;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.core.trace.TraceUtil;
import org.apache.accumulo.core.util.NumUtil;
import org.apache.accumulo.server.cli.ServerUtilOpts;
import org.apache.accumulo.server.fs.VolumeManager;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.beust.jcommander.Parameter;
-import com.google.common.base.Joiner;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.context.Scope;
+/**
+ * This utility class will scan the Accumulo Metadata table to compute the
disk usage for a table or
+ * table(s) by using the size value stored in columns that contain the column
family
+ * {@link MetadataSchema.TabletsSection.DataFileColumnFamily}.
+ *
+ * This class will also track shared files to computed shared usage across all
tables that are
+ * provided as part of the Set of tables when getting disk usage.
+ *
+ * Because the metadata table is used for computing usage and not the actual
files in HDFS the
+ * results will be an estimate. Older entries may exist with no file metadata
(resulting in size 0)
+ * and other actions in the cluster can impact the estimated size such as
flushes, tablet splits,
+ * compactions, etc.
+ *
+ * For the most accurate information a compaction should first be run on the
set of tables being
Review Comment:
```suggestion
* For more accurate information a compaction should first be run on all of
the files for the set of tables being
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]