This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 7190ab6a5 GH-3372: Enhance ColumnSizeCommand to sort by size (#3371)
7190ab6a5 is described below
commit 7190ab6a571c3cffd31e267042c193e90c0301ad
Author: Gang Wu <[email protected]>
AuthorDate: Tue Jan 20 17:42:25 2026 +0800
GH-3372: Enhance ColumnSizeCommand to sort by size (#3371)
- Support sorting by column sizes in the descending order.
- Support printing ratio as percentage.
---
.../parquet/cli/commands/ColumnSizeCommand.java | 72 ++++++++++++++++++++--
1 file changed, 67 insertions(+), 5 deletions(-)
diff --git
a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ColumnSizeCommand.java
b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ColumnSizeCommand.java
index 6de1c7bad..6694fb8ee 100644
---
a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ColumnSizeCommand.java
+++
b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ColumnSizeCommand.java
@@ -24,8 +24,11 @@ import com.beust.jcommander.Parameters;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -56,6 +59,18 @@ public class ColumnSizeCommand extends BaseCommand {
required = false)
List<String> columns;
+ @Parameter(
+ names = {"-s", "--sort"},
+ description = "Sort columns by size in descending order",
+ required = false)
+ boolean sortBySize = false;
+
+ @Parameter(
+ names = {"-p", "--percentage"},
+ description = "Print ratio as percentage instead of decimal",
+ required = false)
+ boolean printAsPercentage = false;
+
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
@@ -67,6 +82,10 @@ public class ColumnSizeCommand extends BaseCommand {
// If user defined columns, only print out size for those columns
if (columns != null && !columns.isEmpty()) {
+ // Collect aggregated column data
+ Map<String, Long> aggregatedSizes = new LinkedHashMap<>();
+ Map<String, Float> aggregatedRatios = new LinkedHashMap<>();
+
for (String inputColumn : columns) {
long size = 0;
float ratio = 0;
@@ -76,18 +95,52 @@ public class ColumnSizeCommand extends BaseCommand {
ratio += columnRatio.get(column);
}
}
- console.info(inputColumn + "->" + " Size In Bytes: " + size + " Size
In Ratio: " + ratio);
+ aggregatedSizes.put(inputColumn, size);
+ aggregatedRatios.put(inputColumn, ratio);
+ }
+
+ // Sort if requested
+ List<Map.Entry<String, Long>> entries = new
ArrayList<>(aggregatedSizes.entrySet());
+ if (sortBySize) {
+ entries.sort(Map.Entry.<String, Long>comparingByValue().reversed());
+ }
+
+ // Print results
+ for (Map.Entry<String, Long> entry : entries) {
+ String column = entry.getKey();
+ long size = entry.getValue();
+ float ratio = aggregatedRatios.get(column);
+ String ratioStr = formatRatio(ratio);
+ console.info(column + "->" + " Size In Bytes: " + size + " Size In
Ratio: " + ratioStr);
}
} else {
- for (String column : columnSizes.keySet()) {
- console.info(column + "->" + " Size In Bytes: " +
columnSizes.get(column) + " Size In Ratio: "
- + columnRatio.get(column));
+ // Sort if requested
+ List<Map.Entry<String, Long>> entries = new
ArrayList<>(columnSizes.entrySet());
+ if (sortBySize) {
+ entries.sort(Map.Entry.<String, Long>comparingByValue().reversed());
+ }
+
+ // Print results
+ for (Map.Entry<String, Long> entry : entries) {
+ String column = entry.getKey();
+ long size = entry.getValue();
+ float ratio = columnRatio.get(column);
+ String ratioStr = formatRatio(ratio);
+ console.info(column + "->" + " Size In Bytes: " + size + " Size In
Ratio: " + ratioStr);
}
}
return 0;
}
+ private String formatRatio(float ratio) {
+ if (printAsPercentage) {
+ return String.format(Locale.US, "%.4f%%", ratio * 100);
+ } else {
+ return String.valueOf(ratio);
+ }
+ }
+
@Override
public List<String> getExamples() {
return Lists.newArrayList(
@@ -96,7 +149,16 @@ public class ColumnSizeCommand extends BaseCommand {
"sample.parquet -c col_1",
"sample.parquet --column col_2",
"sample.parquet --columns col_1 col_2",
- "sample.parquet --columns col_1 col_2.sub_col_a");
+ "sample.parquet --columns col_1 col_2.sub_col_a",
+ "# Sort columns by size in descending order",
+ "sample.parquet --sort",
+ "sample.parquet -s",
+ "# Print ratio as percentage",
+ "sample.parquet --percentage",
+ "sample.parquet -p",
+ "# Combine sorting and percentage formatting",
+ "sample.parquet --sort --percentage",
+ "sample.parquet -s -p -c col_1 col_2");
}
// Make it public to allow some automation tools to call it