This is an automated email from the ASF dual-hosted git repository.

jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 2418317  [CARBONDATA-3544] cli support summary statistics for all 
columns
2418317 is described below

commit 2418317e4b4a918441e5c120766f12ad0d2fb089
Author: QiangCai <qiang...@qq.com>
AuthorDate: Thu Oct 10 19:42:55 2019 +0800

    [CARBONDATA-3544] cli support summary statistics for all columns
    
    add option -C to show statistics for all columns
    
    This closes #3409
---
 .../command/management/CarbonCliCommand.scala      | 21 ++++++++++-----
 .../java/org/apache/carbondata/tool/CarbonCli.java |  5 ++++
 .../org/apache/carbondata/tool/DataSummary.java    | 30 +++++++++++++++++++++-
 .../org/apache/carbondata/tool/FileCollector.java  |  9 ++++++-
 .../org/apache/carbondata/tool/CarbonCliTest.java  | 29 ++++++++++++++++++---
 5 files changed, 83 insertions(+), 11 deletions(-)

diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
index 5dd0c12..e4fb725 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
@@ -49,12 +49,21 @@ case class CarbonCliCommand(
     val carbonTable = CarbonEnv.getCarbonTable(databaseNameOp, 
tableName)(sparkSession)
     setAuditTable(carbonTable)
     setAuditInfo(Map("options" -> commandOptions))
-    val commandArgs: Seq[String] = commandOptions.split("\\s+")
-    val finalCommands = commandArgs.collect {
-      case a if a.trim.equalsIgnoreCase("summary") || 
a.trim.equalsIgnoreCase("benchmark") =>
-        Seq(a, "-p", carbonTable.getTablePath)
-      case x => Seq(x.trim)
-    }.flatten
+    val commandArgs: Seq[String] = commandOptions.split("\\s+").map(_.trim)
+    val finalCommands = commandArgs.exists(_.equalsIgnoreCase("-p")) match {
+      case true =>
+        commandArgs
+      case false =>
+        val needPath = commandArgs.exists { command =>
+          command.equalsIgnoreCase("summary") || 
command.equalsIgnoreCase("benchmark")
+        }
+        needPath match {
+          case true =>
+            commandArgs ++ Seq("-p", carbonTable.getTablePath)
+          case false =>
+            commandArgs
+        }
+    }
     val summaryOutput = new util.ArrayList[String]()
     CarbonCli.run(finalCommands.toArray, summaryOutput, false)
     summaryOutput.asScala.map(x =>
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
index ef9a50e..6cef91a 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
@@ -79,6 +79,10 @@ public class CarbonCli {
         .withDescription("column to print statistics")
         .withLongOpt("column")
         .create("c");
+    Option columns = OptionBuilder
+        .withDescription("print statistics for all columns")
+        .withLongOpt("columns")
+        .create("C");
 
     Option blockletDetail = 
OptionBuilder.withArgName("limitSize").hasOptionalArg()
         .withDescription("print blocklet size detail").withLongOpt("limitSize")
@@ -101,6 +105,7 @@ public class CarbonCli {
     options.addOption(blockletDetail);
     options.addOption(columnMeta);
     options.addOption(columnName);
+    options.addOption(columns);
     options.addOption(version);
     options.addOption(blockLevelDetail);
     return options;
diff --git 
a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
index 1930bf5..31ab535 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
@@ -108,7 +108,9 @@ class DataSummary implements Command {
         collectColumnChunkMeta(columName);
       }
     }
-
+    if (line.hasOption("C")) {
+      printAllColumnStats();
+    }
     collector.close();
     for (DataFile file : dataFiles.values()) {
       file.close();
@@ -371,6 +373,32 @@ class DataSummary implements Command {
     printer.collectFormattedData();
   }
 
+  private void printAllColumnStats() {
+    if (!dataFiles.isEmpty()) {
+      outPuts.add("");
+      outPuts.add("## Statistics for All Columns");
+      String[] header =
+          new String[] { "Block", "Blocklet", "Column Name", "Meta Size", 
"Data Size" };
+      ShardPrinter printer = new ShardPrinter(header, outPuts);
+      for (Map.Entry<String, DataFile> entry : dataFiles.entrySet()) {
+        DataFile dataFile = entry.getValue();
+        List<ColumnSchema> columns = dataFile.getSchema();
+        int columnNum = columns.size();
+        int blockletNum = dataFile.getNumBlocklet();
+        for (int j = 0; j < blockletNum; j++) {
+          for (int i = 0; i < columnNum; i++) {
+            printer.addRow(dataFile.getShardName(),
+                new String[] { dataFile.getPartNo(), String.valueOf(j),
+                    columns.get(i).getColumnName(),
+                    Strings.formatSize(dataFile.getColumnMetaSizeInBytes(j, 
i)),
+                    Strings.formatSize(dataFile.getColumnDataSizeInBytes(j, 
i)) });
+          }
+        }
+      }
+      printer.collectFormattedData();
+    }
+  }
+
   private void collectStats(String columnName) throws IOException, 
MemoryException {
     if (!collected) {
       for (DataFile dataFile : dataFiles.values()) {
diff --git 
a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
index 6c7eaf9..66daa0d 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
@@ -57,7 +57,14 @@ class FileCollector {
   void collectFiles(String dataFolder) throws IOException {
     Set<String> shards = new HashSet<>();
     CarbonFile folder = FileFactory.getCarbonFile(dataFolder);
-    List<CarbonFile> files = folder.listFiles(true);
+    List<CarbonFile> files = new ArrayList<>();
+    if (folder.exists()) {
+      if (folder.isDirectory()) {
+        files = folder.listFiles(true);
+      } else {
+        files.add(folder);
+      }
+    }
     List<DataFile> unsortedFiles = new ArrayList<>();
     for (CarbonFile file : files) {
       if (isColumnarFile(file.getName())) {
diff --git 
a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java 
b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
index 4d89777..f6ff49c 100644
--- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
+++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
@@ -24,8 +24,11 @@ import java.io.PrintStream;
 
 import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
 import org.apache.carbondata.core.constants.CarbonVersionConstants;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
 import org.apache.carbondata.sdk.file.*;
 
 import org.apache.commons.io.FileUtils;
@@ -253,7 +256,6 @@ public class CarbonCliTest {
     PrintStream stream = new PrintStream(out);
     CarbonCli.run(args, stream);
     String output = new String(out.toByteArray());
-    System.out.println(output);
     String expectedOutput = buildLines(
         "Blocklet 0:",
         "Page 0 (offset 0, length 9): 
DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, 
total_uncompressed_size:96000, total_compressed_size:9, 
compressor_name:snappy), rowMajor:false, data_page_length:5, rle_page_length:4, 
presence:PresenceMeta(represents_presence:false, present_bit_stream:00), 
sort_state:SORT_NATIVE, encoders:[RLE], encoder_meta:[], 
min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 
62 6F 74 30], min_max_presence:[true] [...]
@@ -261,13 +263,34 @@ public class CarbonCliTest {
   }
 
   @Test
+  public void testSummaryAllColumns() {
+    String[] args = { "-cmd", "summary", "-p", path, "-C" };
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    PrintStream stream = new PrintStream(out);
+    CarbonCli.run(args, stream);
+    String output = new String(out.toByteArray());
+    Assert.assertTrue(output.contains("Block  Blocklet  Column Name  Meta Size 
 Data Size"));
+  }
+
+  @Test
+  public void testSummaryAllColumnsForOneFile() {
+    CarbonFile folder = FileFactory.getCarbonFile(path);
+    CarbonFile[] carbonFiles =
+        folder.listFiles(file -> 
file.getName().endsWith(CarbonTablePath.CARBON_DATA_EXT));
+    String[] args = { "-cmd", "summary", "-p", 
carbonFiles[0].getCanonicalPath(), "-C" };
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    PrintStream stream = new PrintStream(out);
+    CarbonCli.run(args, stream);
+    String output = new String(out.toByteArray());
+    Assert.assertTrue(output.contains("Block  Blocklet  Column Name  Meta Size 
 Data Size"));
+  }
+
+  @Test
   public void testBenchmark() {
     String[] args = {"-cmd", "benchmark", "-p", path, "-a", "-c", "name"};
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     PrintStream stream = new PrintStream(out);
     CarbonCli.run(args, stream);
-    String output = new String(out.toByteArray());
-    System.out.println(output);
   }
 
   @Test

Reply via email to