This is an automated email from the ASF dual-hosted git repository.
jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 2418317 [CARBONDATA-3544] cli support summary statistics for all
columns
2418317 is described below
commit 2418317e4b4a918441e5c120766f12ad0d2fb089
Author: QiangCai <[email protected]>
AuthorDate: Thu Oct 10 19:42:55 2019 +0800
[CARBONDATA-3544] cli support summary statistics for all columns
add option -C to show statistics for all columns
This closes #3409
---
.../command/management/CarbonCliCommand.scala | 21 ++++++++++-----
.../java/org/apache/carbondata/tool/CarbonCli.java | 5 ++++
.../org/apache/carbondata/tool/DataSummary.java | 30 +++++++++++++++++++++-
.../org/apache/carbondata/tool/FileCollector.java | 9 ++++++-
.../org/apache/carbondata/tool/CarbonCliTest.java | 29 ++++++++++++++++++---
5 files changed, 83 insertions(+), 11 deletions(-)
diff --git
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
index 5dd0c12..e4fb725 100644
---
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
+++
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
@@ -49,12 +49,21 @@ case class CarbonCliCommand(
val carbonTable = CarbonEnv.getCarbonTable(databaseNameOp,
tableName)(sparkSession)
setAuditTable(carbonTable)
setAuditInfo(Map("options" -> commandOptions))
- val commandArgs: Seq[String] = commandOptions.split("\\s+")
- val finalCommands = commandArgs.collect {
- case a if a.trim.equalsIgnoreCase("summary") ||
a.trim.equalsIgnoreCase("benchmark") =>
- Seq(a, "-p", carbonTable.getTablePath)
- case x => Seq(x.trim)
- }.flatten
+ val commandArgs: Seq[String] = commandOptions.split("\\s+").map(_.trim)
+ val finalCommands = commandArgs.exists(_.equalsIgnoreCase("-p")) match {
+ case true =>
+ commandArgs
+ case false =>
+ val needPath = commandArgs.exists { command =>
+ command.equalsIgnoreCase("summary") ||
command.equalsIgnoreCase("benchmark")
+ }
+ needPath match {
+ case true =>
+ commandArgs ++ Seq("-p", carbonTable.getTablePath)
+ case false =>
+ commandArgs
+ }
+ }
val summaryOutput = new util.ArrayList[String]()
CarbonCli.run(finalCommands.toArray, summaryOutput, false)
summaryOutput.asScala.map(x =>
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
index ef9a50e..6cef91a 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
@@ -79,6 +79,10 @@ public class CarbonCli {
.withDescription("column to print statistics")
.withLongOpt("column")
.create("c");
+ Option columns = OptionBuilder
+ .withDescription("print statistics for all columns")
+ .withLongOpt("columns")
+ .create("C");
Option blockletDetail =
OptionBuilder.withArgName("limitSize").hasOptionalArg()
.withDescription("print blocklet size detail").withLongOpt("limitSize")
@@ -101,6 +105,7 @@ public class CarbonCli {
options.addOption(blockletDetail);
options.addOption(columnMeta);
options.addOption(columnName);
+ options.addOption(columns);
options.addOption(version);
options.addOption(blockLevelDetail);
return options;
diff --git
a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
index 1930bf5..31ab535 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
@@ -108,7 +108,9 @@ class DataSummary implements Command {
collectColumnChunkMeta(columName);
}
}
-
+ if (line.hasOption("C")) {
+ printAllColumnStats();
+ }
collector.close();
for (DataFile file : dataFiles.values()) {
file.close();
@@ -371,6 +373,32 @@ class DataSummary implements Command {
printer.collectFormattedData();
}
+ private void printAllColumnStats() {
+ if (!dataFiles.isEmpty()) {
+ outPuts.add("");
+ outPuts.add("## Statistics for All Columns");
+ String[] header =
+ new String[] { "Block", "Blocklet", "Column Name", "Meta Size",
"Data Size" };
+ ShardPrinter printer = new ShardPrinter(header, outPuts);
+ for (Map.Entry<String, DataFile> entry : dataFiles.entrySet()) {
+ DataFile dataFile = entry.getValue();
+ List<ColumnSchema> columns = dataFile.getSchema();
+ int columnNum = columns.size();
+ int blockletNum = dataFile.getNumBlocklet();
+ for (int j = 0; j < blockletNum; j++) {
+ for (int i = 0; i < columnNum; i++) {
+ printer.addRow(dataFile.getShardName(),
+ new String[] { dataFile.getPartNo(), String.valueOf(j),
+ columns.get(i).getColumnName(),
+ Strings.formatSize(dataFile.getColumnMetaSizeInBytes(j,
i)),
+ Strings.formatSize(dataFile.getColumnDataSizeInBytes(j,
i)) });
+ }
+ }
+ }
+ printer.collectFormattedData();
+ }
+ }
+
private void collectStats(String columnName) throws IOException,
MemoryException {
if (!collected) {
for (DataFile dataFile : dataFiles.values()) {
diff --git
a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
index 6c7eaf9..66daa0d 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
@@ -57,7 +57,14 @@ class FileCollector {
void collectFiles(String dataFolder) throws IOException {
Set<String> shards = new HashSet<>();
CarbonFile folder = FileFactory.getCarbonFile(dataFolder);
- List<CarbonFile> files = folder.listFiles(true);
+ List<CarbonFile> files = new ArrayList<>();
+ if (folder.exists()) {
+ if (folder.isDirectory()) {
+ files = folder.listFiles(true);
+ } else {
+ files.add(folder);
+ }
+ }
List<DataFile> unsortedFiles = new ArrayList<>();
for (CarbonFile file : files) {
if (isColumnarFile(file.getName())) {
diff --git
a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
index 4d89777..f6ff49c 100644
--- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
+++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
@@ -24,8 +24,11 @@ import java.io.PrintStream;
import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
import org.apache.carbondata.core.constants.CarbonVersionConstants;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.sdk.file.*;
import org.apache.commons.io.FileUtils;
@@ -253,7 +256,6 @@ public class CarbonCliTest {
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String output = new String(out.toByteArray());
- System.out.println(output);
String expectedOutput = buildLines(
"Blocklet 0:",
"Page 0 (offset 0, length 9):
DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED,
total_uncompressed_size:96000, total_compressed_size:9,
compressor_name:snappy), rowMajor:false, data_page_length:5, rle_page_length:4,
presence:PresenceMeta(represents_presence:false, present_bit_stream:00),
sort_state:SORT_NATIVE, encoders:[RLE], encoder_meta:[],
min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F
62 6F 74 30], min_max_presence:[true] [...]
@@ -261,13 +263,34 @@ public class CarbonCliTest {
}
@Test
+ public void testSummaryAllColumns() {
+ String[] args = { "-cmd", "summary", "-p", path, "-C" };
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PrintStream stream = new PrintStream(out);
+ CarbonCli.run(args, stream);
+ String output = new String(out.toByteArray());
+ Assert.assertTrue(output.contains("Block Blocklet Column Name Meta Size
Data Size"));
+ }
+
+ @Test
+ public void testSummaryAllColumnsForOneFile() {
+ CarbonFile folder = FileFactory.getCarbonFile(path);
+ CarbonFile[] carbonFiles =
+ folder.listFiles(file ->
file.getName().endsWith(CarbonTablePath.CARBON_DATA_EXT));
+ String[] args = { "-cmd", "summary", "-p",
carbonFiles[0].getCanonicalPath(), "-C" };
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PrintStream stream = new PrintStream(out);
+ CarbonCli.run(args, stream);
+ String output = new String(out.toByteArray());
+ Assert.assertTrue(output.contains("Block Blocklet Column Name Meta Size
Data Size"));
+ }
+
+ @Test
public void testBenchmark() {
String[] args = {"-cmd", "benchmark", "-p", path, "-a", "-c", "name"};
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
- String output = new String(out.toByteArray());
- System.out.println(output);
}
@Test