This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1bd27a3ea87622edc869d3ff72ce9b0881451c95
Author: Qifan Chen <qc...@cloudera.com>
AuthorDate: Mon Oct 12 20:31:31 2020 -0400

    IMPALA-7097 Print EC info in the query plan and profile
    
    This fix added the functionality to show the number of erasure coded
    files and the total size of such files in the scan node in the
    query plan and profile. Shown below are two examples for the HDFS file
    system.
    
    Non-partitioned table:
    00:SCAN HDFS [default.test_show_ec_nonpart, RANDOM]
       HDFS partitions=1/1 files=2 size=1.65KB
       erasure coded: files=2 size=1.65KB
       stored statistics:
    
    Partitioned table:
    00:SCAN HDFS [default.test_show_ec_part]
       HDFS partitions=4/4 files=4 size=2.36KB
       erasure coded: files=3 size=1.77KB
       row-size=12B cardinality=999
    
    Testing:
    1. Unit testing;
    2. Ran Core tests successfully.
    
    Change-Id: I6ea378914624a714fde820d290b3b9c43325c6a1
    Reviewed-on: http://gerrit.cloudera.org:8080/16587
    Reviewed-by: Aman Sinha <amsi...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 .../org/apache/impala/planner/HdfsScanNode.java    | 30 ++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 4a3cee6..40e3e9e 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -212,6 +212,10 @@ public class HdfsScanNode extends ScanNode {
   private Map<FileSystemUtil.FsType, Long> totalFilesPerFs_ = new TreeMap<>();
   private Map<FileSystemUtil.FsType, Long> totalBytesPerFs_ = new TreeMap<>();
 
+  // Number of erasure coded files and bytes scanned, groupped by the FsType.
+  private Map<FileSystemUtil.FsType, Long> totalFilesPerFsEC_ = new 
TreeMap<>();
+  private Map<FileSystemUtil.FsType, Long> totalBytesPerFsEC_ = new 
TreeMap<>();
+
   // File formats scanned. Set in computeScanRangeLocations().
   private Set<HdfsFileFormat> fileFormats_;
 
@@ -833,6 +837,8 @@ public class HdfsScanNode extends ScanNode {
 
     totalFilesPerFs_ = new TreeMap<>();
     totalBytesPerFs_ = new TreeMap<>();
+    totalFilesPerFsEC_ = new TreeMap<>();
+    totalBytesPerFsEC_ = new TreeMap<>();
     largestScanRangeBytes_ = 0;
     maxScanRangeNumRows_ = -1;
     fileFormats_ = new HashSet<>();
@@ -883,6 +889,7 @@ public class HdfsScanNode extends ScanNode {
       boolean partitionMissingDiskIds = false;
       totalBytesPerFs_.merge(partition.getFsType(), partitionBytes, Long::sum);
       totalFilesPerFs_.merge(partition.getFsType(), (long) fileDescs.size(), 
Long::sum);
+
       for (FileDescriptor fileDesc: fileDescs) {
         if (!analyzer.getQueryOptions().isAllow_erasure_coded_files() &&
             fileDesc.getIsEc()) {
@@ -890,6 +897,14 @@ public class HdfsScanNode extends ScanNode {
               "Scanning of HDFS erasure-coded file (%s/%s) is not supported",
               partition.getLocation(), fileDesc.getRelativePath()));
         }
+
+        // Accumulate on the number of EC files and the total size of such 
files.
+        if (fileDesc.getIsEc()) {
+          totalFilesPerFsEC_.merge(partition.getFsType(), 1L, Long::sum);
+          totalBytesPerFsEC_.merge(
+              partition.getFsType(), fileDesc.getFileLength(), Long::sum);
+        }
+
         if (!fsHasBlocks) {
           Preconditions.checkState(fileDesc.getNumFileBlocks() == 0);
           generateScanRangeSpecs(partition, fileDesc, scanRangeBytesLimit);
@@ -1471,6 +1486,7 @@ public class HdfsScanNode extends ScanNode {
               Expr.getExplainString(partitionConjuncts_, detailLevel)));
       }
       String partMetaTemplate = "partitions=%d/%d files=%d size=%s\n";
+      String erasureCodeTemplate = "erasure coded: files=%d size=%s\n";
       if (!numPartitionsPerFs_.isEmpty()) {
         // The table is partitioned; print a line for each filesystem we are 
reading
         // partitions from
@@ -1482,16 +1498,26 @@ public class HdfsScanNode extends ScanNode {
           output.append(String.format(partMetaTemplate, partsPerFs.getValue(),
               table.getPartitions().size(), totalFilesPerFs_.get(fsType),
               PrintUtils.printBytes(totalBytesPerFs_.get(fsType))));
+
+          // Report the total number of erasure coded files and total bytes, 
if any.
+          if (totalFilesPerFsEC_.containsKey(fsType)) {
+            long totalNumECFiles = totalFilesPerFsEC_.get(fsType);
+            long totalECSize = totalBytesPerFsEC_.get(fsType);
+            output.append(String.format("%s", detailPrefix))
+                .append(String.format(erasureCodeTemplate, totalNumECFiles,
+                    PrintUtils.printBytes(totalECSize)));
+          }
         }
       } else if (tbl_.getNumClusteringCols() == 0) {
-        // There are no partitions so we use the FsType of the base table
+        // There are no partitions so we use the FsType of the base table. No 
report
+        // on EC related info.
         output.append(detailPrefix);
         output.append(table.getFsType()).append(" ");
         output.append(String.format(partMetaTemplate, 1, 
table.getPartitions().size(),
             0, PrintUtils.printBytes(0)));
       } else {
         // The table is partitioned, but no partitions are selected; in this 
case we
-        // exclude the FsType completely
+        // exclude the FsType completely. No report on EC related info.
         output.append(detailPrefix);
         output.append(String.format(partMetaTemplate, 0, 
table.getPartitions().size(),
             0, PrintUtils.printBytes(0)));

Reply via email to