This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 1bd27a3ea87622edc869d3ff72ce9b0881451c95 Author: Qifan Chen <qc...@cloudera.com> AuthorDate: Mon Oct 12 20:31:31 2020 -0400 IMPALA-7097 Print EC info in the query plan and profile This fix added the functionality to show the number of erasure coded files and the total size of such files in the scan node in the query plan and profile. Shown below are two examples for the HDFS file system. Non-partitioned table: 00:SCAN HDFS [default.test_show_ec_nonpart, RANDOM] HDFS partitions=1/1 files=2 size=1.65KB erasure coded: files=2 size=1.65KB stored statistics: Partitioned table: 00:SCAN HDFS [default.test_show_ec_part] HDFS partitions=4/4 files=4 size=2.36KB erasure coded: files=3 size=1.77KB row-size=12B cardinality=999 Testing: 1. Unit testing; 2. Ran Core tests successfully. Change-Id: I6ea378914624a714fde820d290b3b9c43325c6a1 Reviewed-on: http://gerrit.cloudera.org:8080/16587 Reviewed-by: Aman Sinha <amsi...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- .../org/apache/impala/planner/HdfsScanNode.java | 30 ++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java index 4a3cee6..40e3e9e 100644 --- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java @@ -212,6 +212,10 @@ public class HdfsScanNode extends ScanNode { private Map<FileSystemUtil.FsType, Long> totalFilesPerFs_ = new TreeMap<>(); private Map<FileSystemUtil.FsType, Long> totalBytesPerFs_ = new TreeMap<>(); + // Number of erasure coded files and bytes scanned, groupped by the FsType. + private Map<FileSystemUtil.FsType, Long> totalFilesPerFsEC_ = new TreeMap<>(); + private Map<FileSystemUtil.FsType, Long> totalBytesPerFsEC_ = new TreeMap<>(); + // File formats scanned. Set in computeScanRangeLocations(). private Set<HdfsFileFormat> fileFormats_; @@ -833,6 +837,8 @@ public class HdfsScanNode extends ScanNode { totalFilesPerFs_ = new TreeMap<>(); totalBytesPerFs_ = new TreeMap<>(); + totalFilesPerFsEC_ = new TreeMap<>(); + totalBytesPerFsEC_ = new TreeMap<>(); largestScanRangeBytes_ = 0; maxScanRangeNumRows_ = -1; fileFormats_ = new HashSet<>(); @@ -883,6 +889,7 @@ public class HdfsScanNode extends ScanNode { boolean partitionMissingDiskIds = false; totalBytesPerFs_.merge(partition.getFsType(), partitionBytes, Long::sum); totalFilesPerFs_.merge(partition.getFsType(), (long) fileDescs.size(), Long::sum); + for (FileDescriptor fileDesc: fileDescs) { if (!analyzer.getQueryOptions().isAllow_erasure_coded_files() && fileDesc.getIsEc()) { @@ -890,6 +897,14 @@ public class HdfsScanNode extends ScanNode { "Scanning of HDFS erasure-coded file (%s/%s) is not supported", partition.getLocation(), fileDesc.getRelativePath())); } + + // Accumulate on the number of EC files and the total size of such files. + if (fileDesc.getIsEc()) { + totalFilesPerFsEC_.merge(partition.getFsType(), 1L, Long::sum); + totalBytesPerFsEC_.merge( + partition.getFsType(), fileDesc.getFileLength(), Long::sum); + } + if (!fsHasBlocks) { Preconditions.checkState(fileDesc.getNumFileBlocks() == 0); generateScanRangeSpecs(partition, fileDesc, scanRangeBytesLimit); @@ -1471,6 +1486,7 @@ public class HdfsScanNode extends ScanNode { Expr.getExplainString(partitionConjuncts_, detailLevel))); } String partMetaTemplate = "partitions=%d/%d files=%d size=%s\n"; + String erasureCodeTemplate = "erasure coded: files=%d size=%s\n"; if (!numPartitionsPerFs_.isEmpty()) { // The table is partitioned; print a line for each filesystem we are reading // partitions from @@ -1482,16 +1498,26 @@ public class HdfsScanNode extends ScanNode { output.append(String.format(partMetaTemplate, partsPerFs.getValue(), table.getPartitions().size(), totalFilesPerFs_.get(fsType), PrintUtils.printBytes(totalBytesPerFs_.get(fsType)))); + + // Report the total number of erasure coded files and total bytes, if any. + if (totalFilesPerFsEC_.containsKey(fsType)) { + long totalNumECFiles = totalFilesPerFsEC_.get(fsType); + long totalECSize = totalBytesPerFsEC_.get(fsType); + output.append(String.format("%s", detailPrefix)) + .append(String.format(erasureCodeTemplate, totalNumECFiles, + PrintUtils.printBytes(totalECSize))); + } } } else if (tbl_.getNumClusteringCols() == 0) { - // There are no partitions so we use the FsType of the base table + // There are no partitions so we use the FsType of the base table. No report + // on EC related info. output.append(detailPrefix); output.append(table.getFsType()).append(" "); output.append(String.format(partMetaTemplate, 1, table.getPartitions().size(), 0, PrintUtils.printBytes(0))); } else { // The table is partitioned, but no partitions are selected; in this case we - // exclude the FsType completely + // exclude the FsType completely. No report on EC related info. output.append(detailPrefix); output.append(String.format(partMetaTemplate, 0, table.getPartitions().size(), 0, PrintUtils.printBytes(0)));