http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/fe/src/main/java/org/apache/impala/analysis/SlotRef.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java index 23f2d88..0a945bd 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java +++ b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java @@ -153,26 +153,6 @@ public class SlotRef extends Expr { return "<slot " + Integer.toString(desc_.getId().asInt()) + ">"; } - /** - * Checks if this slotRef refers to an array "pos" pseudo-column. - * - * Note: checking whether the column is null distinguishes between top-level columns - * and nested types. This check more specifically looks just for a reference to the - * "pos" field of an array type. - */ - public boolean isArrayPosRef() { - TupleDescriptor parent = getDesc().getParent(); - if (parent == null) return false; - Type parentType = parent.getType(); - if (parentType instanceof CollectionStructType) { - if (((CollectionStructType)parentType).isArrayStruct() && - getDesc().getLabel().equals(Path.ARRAY_POS_FIELD_NAME)) { - return true; - } - } - return false; - } - @Override protected void toThrift(TExprNode msg) { msg.node_type = TExprNodeType.SLOT_REF;
http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java index ac67d7d..a1f47aa 100644 --- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java @@ -59,6 +59,7 @@ import org.apache.impala.common.ImpalaException; import org.apache.impala.common.ImpalaRuntimeException; import org.apache.impala.common.InternalException; import org.apache.impala.common.NotImplementedException; +import org.apache.impala.common.Pair; import org.apache.impala.common.PrintUtils; import org.apache.impala.common.RuntimeEnv; import org.apache.impala.fb.FbFileBlock; @@ -76,6 +77,7 @@ import org.apache.impala.thrift.TScanRange; import org.apache.impala.thrift.TScanRangeLocation; import org.apache.impala.thrift.TScanRangeLocationList; import org.apache.impala.thrift.TTableStats; +import org.apache.impala.util.BitUtil; import org.apache.impala.util.MembershipSnapshot; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -143,7 +145,7 @@ public class HdfsScanNode extends ScanNode { // derived experimentally: running metadata-only Parquet count(*) scans on TPC-H // lineitem and TPC-DS store_sales of different sizes resulted in memory consumption // between 128kb and 1.1mb. - private final static long MIN_MEMORY_ESTIMATE = 1 * 1024 * 1024; + private static final long MIN_MEMORY_ESTIMATE = 1L * 1024L * 1024L; private final HdfsTable tbl_; @@ -166,6 +168,18 @@ public class HdfsScanNode extends ScanNode { private long totalFiles_ = 0; private long totalBytes_ = 0; + // File formats scanned. Set in computeScanRangeLocations(). + private Set<HdfsFileFormat> fileFormats_; + + // Number of bytes in the largest scan range (i.e. hdfs split). Set in + // computeScanRangeLocations(). + private long maxScanRangeBytes_ = 0; + + // The ideal reservation to process a single scan range (i.e. hdfs split), >= the + // minimum reservation. Generally provides enough memory to overlap CPU and I/O and + // maximize throughput. Set in computeResourceProfile(). + private long idealScanRangeReservation_ = -1; + // Input cardinality based on the partition row counts or extrapolation. -1 if invalid. // Both values can be valid to report them in the explain plan, but only one of them is // used for determining the scan cardinality. @@ -329,26 +343,26 @@ public class HdfsScanNode extends ScanNode { computeDictionaryFilterConjuncts(analyzer); // compute scan range locations with optional sampling - Set<HdfsFileFormat> fileFormats = computeScanRangeLocations(analyzer); + computeScanRangeLocations(analyzer); // Determine backend scan node implementation to use. The optimized MT implementation // is currently supported for Parquet, ORC and Text. if (analyzer.getQueryOptions().isSetMt_dop() && analyzer.getQueryOptions().mt_dop > 0 && - fileFormats.size() == 1 && - (fileFormats.contains(HdfsFileFormat.PARQUET) - || fileFormats.contains(HdfsFileFormat.ORC) - || fileFormats.contains(HdfsFileFormat.TEXT))) { + fileFormats_.size() == 1 && + (fileFormats_.contains(HdfsFileFormat.PARQUET) + || fileFormats_.contains(HdfsFileFormat.ORC) + || fileFormats_.contains(HdfsFileFormat.TEXT))) { useMtScanNode_ = true; } else { useMtScanNode_ = false; } - if (fileFormats.contains(HdfsFileFormat.PARQUET)) { + if (fileFormats_.contains(HdfsFileFormat.PARQUET)) { computeMinMaxTupleAndConjuncts(analyzer); } - if (canApplyParquetCountStarOptimization(analyzer, fileFormats)) { + if (canApplyParquetCountStarOptimization(analyzer, fileFormats_)) { Preconditions.checkState(desc_.getPath().destTable() != null); Preconditions.checkState(collectionConjuncts_.isEmpty()); countStarSlot_ = applyParquetCountStartOptimization(analyzer); @@ -461,7 +475,7 @@ public class HdfsScanNode extends ScanNode { // This node is a table scan, so this must be a scanning slot. Preconditions.checkState(slotRef.getDesc().isScanSlot()); // Skip the slot ref if it refers to an array's "pos" field. - if (slotRef.isArrayPosRef()) return; + if (slotRef.getDesc().isArrayPosRef()) return; Expr constExpr = binaryPred.getChild(1); // Only constant exprs can be evaluated against parquet::Statistics. This includes @@ -489,7 +503,7 @@ public class HdfsScanNode extends ScanNode { // This node is a table scan, so this must be a scanning slot. Preconditions.checkState(slotRef.getDesc().isScanSlot()); // Skip the slot ref if it refers to an array's "pos" field. - if (slotRef.isArrayPosRef()) return; + if (slotRef.getDesc().isArrayPosRef()) return; if (inPred.isNotIn()) return; ArrayList<Expr> children = inPred.getChildren(); @@ -696,13 +710,15 @@ public class HdfsScanNode extends ScanNode { } /** - * Computes scan ranges (hdfs splits) plus their storage locations, including volume - * ids, based on the given maximum number of bytes each scan range should scan. + * Computes scan ranges (i.e. hdfs splits) plus their storage locations, including + * volume ids, based on the given maximum number of bytes each scan range should scan. * If 'sampleParams_' is not null, generates a sample and computes the scan ranges * based on the sample. - * Returns the set of file formats being scanned. + * + * Initializes members with information about files and scan ranges, e.g. totalFiles_, + * fileFormats_, etc. */ - private Set<HdfsFileFormat> computeScanRangeLocations(Analyzer analyzer) + private void computeScanRangeLocations(Analyzer analyzer) throws ImpalaRuntimeException { Map<Long, List<FileDescriptor>> sampledFiles = null; if (sampleParams_ != null) { @@ -725,7 +741,8 @@ public class HdfsScanNode extends ScanNode { numPartitions_ = (sampledFiles != null) ? sampledFiles.size() : partitions_.size(); totalFiles_ = 0; totalBytes_ = 0; - Set<HdfsFileFormat> fileFormats = Sets.newHashSet(); + maxScanRangeBytes_ = 0; + fileFormats_ = Sets.newHashSet(); for (HdfsPartition partition: partitions_) { List<FileDescriptor> fileDescs = partition.getFileDescriptors(); if (sampledFiles != null) { @@ -735,7 +752,7 @@ public class HdfsScanNode extends ScanNode { } analyzer.getDescTbl().addReferencedPartition(tbl_, partition.getId()); - fileFormats.add(partition.getFileFormat()); + fileFormats_.add(partition.getFileFormat()); Preconditions.checkState(partition.getId() >= 0); // Missing disk id accounting is only done for file systems that support the notion // of disk/storage ids. @@ -798,6 +815,7 @@ public class HdfsScanNode extends ScanNode { scanRangeLocations.scan_range = scanRange; scanRangeLocations.locations = locations; scanRanges_.add(scanRangeLocations); + maxScanRangeBytes_ = Math.max(maxScanRangeBytes_, currentLength); remainingLength -= currentLength; currentOffset += currentLength; } @@ -812,7 +830,58 @@ public class HdfsScanNode extends ScanNode { } if (partitionMissingDiskIds) ++numPartitionsNoDiskIds_; } - return fileFormats; + } + + /** + * Compute the number of columns that are read from the file, as opposed to + * materialised based on metadata. If there are nested collections, counts the + * number of leaf scalar slots per collection. This matches Parquet's "shredded" + * approach to nested collections, where each nested field is stored as a separate + * column. We may need to adjust this logic for non-shredded columnar formats if added. + */ + private int computeNumColumnsReadFromFile() { + HdfsTable table = (HdfsTable) desc_.getTable(); + int numColumns = 0; + boolean havePosSlot = false; + for (SlotDescriptor slot: desc_.getSlots()) { + if (!slot.isMaterialized() || slot == countStarSlot_) continue; + if (slot.getColumn() == null || + slot.getColumn().getPosition() >= table.getNumClusteringCols()) { + if (slot.isArrayPosRef()) { + // Position virtual slots can be materialized by piggybacking on another slot. + havePosSlot = true; + } else if (slot.getType().isScalarType()) { + ++numColumns; + } else { + numColumns += computeNumColumnsReadForCollection(slot); + } + } + } + // Must scan something to materialize a position slot. + if (havePosSlot) numColumns = Math.max(numColumns, 1); + return numColumns; + } + + /** + * Compute the number of columns read from disk for materialized scalar slots in + * the provided tuple. + */ + private int computeNumColumnsReadForCollection(SlotDescriptor collectionSlot) { + Preconditions.checkState(collectionSlot.getType().isCollectionType()); + int numColumns = 0; + for (SlotDescriptor nestedSlot: collectionSlot.getItemTupleDesc().getSlots()) { + // Position virtual slots can be materialized by piggybacking on another slot. + if (!nestedSlot.isMaterialized() || nestedSlot.isArrayPosRef()) continue; + if (nestedSlot.getType().isScalarType()) { + ++numColumns; + } else { + numColumns += computeNumColumnsReadForCollection(nestedSlot); + } + } + // Need to scan at least one column to materialize the pos virtual slot and/or + // determine the size of the nested array. + numColumns = Math.max(numColumns, 1); + return numColumns; } /** @@ -1007,6 +1076,8 @@ public class HdfsScanNode extends ScanNode { } msg.hdfs_scan_node.setRandom_replica(randomReplica_); msg.node_type = TPlanNodeType.HDFS_SCAN_NODE; + Preconditions.checkState(idealScanRangeReservation_ >= 0, idealScanRangeReservation_); + msg.hdfs_scan_node.setIdeal_scan_range_reservation(idealScanRangeReservation_); if (!collectionConjuncts_.isEmpty()) { Map<Integer, List<TExpr>> tcollectionConjuncts = Maps.newLinkedHashMap(); for (Map.Entry<TupleDescriptor, List<Expr>> entry: @@ -1185,26 +1256,21 @@ public class HdfsScanNode extends ScanNode { Preconditions.checkNotNull(scanRanges_, "Cost estimation requires scan ranges."); if (scanRanges_.isEmpty()) { nodeResourceProfile_ = ResourceProfile.noReservation(0); + idealScanRangeReservation_ = 0; return; } Preconditions.checkState(0 < numNodes_ && numNodes_ <= scanRanges_.size()); Preconditions.checkNotNull(desc_); Preconditions.checkNotNull(desc_.getTable() instanceof HdfsTable); HdfsTable table = (HdfsTable) desc_.getTable(); + int numColumnsReadFromFile = computeNumColumnsReadFromFile(); int perHostScanRanges; if (table.getMajorityFormat() == HdfsFileFormat.PARQUET || table.getMajorityFormat() == HdfsFileFormat.ORC) { // For the purpose of this estimation, the number of per-host scan ranges for // Parquet/ORC files are equal to the number of columns read from the file. I.e. // excluding partition columns and columns that are populated from file metadata. - perHostScanRanges = 0; - for (SlotDescriptor slot: desc_.getSlots()) { - if (!slot.isMaterialized() || slot == countStarSlot_) continue; - if (slot.getColumn() == null || - slot.getColumn().getPosition() >= table.getNumClusteringCols()) { - ++perHostScanRanges; - } - } + perHostScanRanges = numColumnsReadFromFile; } else { perHostScanRanges = (int) Math.ceil(( (double) scanRanges_.size() / (double) numNodes_) * SCAN_RANGE_SKEW_FACTOR); @@ -1226,12 +1292,13 @@ public class HdfsScanNode extends ScanNode { long avgScanRangeBytes = (long) Math.ceil(totalBytes_ / (double) scanRanges_.size()); // The +1 accounts for an extra I/O buffer to read past the scan range due to a // trailing record spanning Hdfs blocks. - long readSize = BackendConfig.INSTANCE.getReadSize(); + long maxIoBufferSize = + BitUtil.roundUpToPowerOf2(BackendConfig.INSTANCE.getReadSize()); long perThreadIoBuffers = - Math.min((long) Math.ceil(avgScanRangeBytes / (double) readSize), + Math.min((long) Math.ceil(avgScanRangeBytes / (double) maxIoBufferSize), MAX_IO_BUFFERS_PER_THREAD) + 1; long perInstanceMemEstimate = checkedMultiply( - checkedMultiply(maxScannerThreads, perThreadIoBuffers), readSize); + checkedMultiply(maxScannerThreads, perThreadIoBuffers), maxIoBufferSize); // Sanity check: the tighter estimation should not exceed the per-host maximum. long perHostUpperBound = getPerHostMemUpperBound(); @@ -1242,7 +1309,51 @@ public class HdfsScanNode extends ScanNode { perInstanceMemEstimate = perHostUpperBound; } perInstanceMemEstimate = Math.max(perInstanceMemEstimate, MIN_MEMORY_ESTIMATE); - nodeResourceProfile_ = ResourceProfile.noReservation(perInstanceMemEstimate); + + Pair<Long, Long> reservation = computeReservation(numColumnsReadFromFile); + nodeResourceProfile_ = new ResourceProfileBuilder() + .setMemEstimateBytes(perInstanceMemEstimate) + .setMinReservationBytes(reservation.first).build(); + idealScanRangeReservation_ = reservation.second; + } + + /* + * Compute the minimum and ideal memory reservation to process a single scan range + * (i.e. hdfs split). Bound the reservation based on: + * - One minimum-sized buffer per IoMgr scan range, which is the absolute minimum + * required to scan the data. + * - A maximum of either 1 or 3 max-sized I/O buffers per IoMgr scan range for + * the minimum and ideal reservation respectively. 1 max-sized I/O buffer avoids + * issuing small I/O unnecessarily while 3 max-sized I/O buffers guarantees higher + * throughput by overlapping compute and I/O efficiently. + * - A maximum reservation of the hdfs split size, to avoid reserving excessive + * memory for small files or ranges, e.g. small dimension tables with very few + * rows. + */ + private Pair<Long, Long> computeReservation(int numColumnsReadFromFile) { + Preconditions.checkState(maxScanRangeBytes_ >= 0); + long maxIoBufferSize = + BitUtil.roundUpToPowerOf2(BackendConfig.INSTANCE.getReadSize()); + // Scanners for columnar formats issue one IoMgr scan range for metadata, followed by + // one IoMgr scan range per column in parallel. Scanners for row-oriented formats + // issue only one IoMgr scan range at a time. + int iomgrScanRangesPerSplit = fileFormats_.contains(HdfsFileFormat.PARQUET) ? + Math.max(1, numColumnsReadFromFile) : 1; + // Need one buffer per IoMgr scan range to execute the scan. + long minReservationToExecute = + iomgrScanRangesPerSplit * BackendConfig.INSTANCE.getMinBufferSize(); + + // Quantize the max scan range (i.e. hdfs split) size to an I/O buffer size. + long quantizedMaxScanRangeBytes = maxScanRangeBytes_ < maxIoBufferSize ? + BitUtil.roundUpToPowerOf2(maxScanRangeBytes_) : + BitUtil.roundUpToPowerOf2Factor(maxScanRangeBytes_, maxIoBufferSize); + long minReservationBytes = Math.max(minReservationToExecute, + Math.min(iomgrScanRangesPerSplit * maxIoBufferSize, + quantizedMaxScanRangeBytes)); + long idealReservationBytes = Math.max(minReservationToExecute, + Math.min(iomgrScanRangesPerSplit * maxIoBufferSize * 3, + quantizedMaxScanRangeBytes)); + return Pair.create(minReservationBytes, idealReservationBytes); } /** http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/fe/src/main/java/org/apache/impala/util/BitUtil.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/util/BitUtil.java b/fe/src/main/java/org/apache/impala/util/BitUtil.java index 839dd6e..6bb670d 100644 --- a/fe/src/main/java/org/apache/impala/util/BitUtil.java +++ b/fe/src/main/java/org/apache/impala/util/BitUtil.java @@ -29,4 +29,10 @@ public class BitUtil { public static long roundUpToPowerOf2(long val) { return 1L << log2Ceiling(val); } + + // Round up 'val' to the nearest multiple of a power-of-two 'factor'. + // 'val' must be > 0. + public static long roundUpToPowerOf2Factor(long val, long factor) { + return (val + (factor - 1)) & ~(factor - 1); + } } http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/fe/src/test/java/org/apache/impala/util/BitUtilTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/util/BitUtilTest.java b/fe/src/test/java/org/apache/impala/util/BitUtilTest.java index a134b6a..a6da80c 100644 --- a/fe/src/test/java/org/apache/impala/util/BitUtilTest.java +++ b/fe/src/test/java/org/apache/impala/util/BitUtilTest.java @@ -46,4 +46,10 @@ public class BitUtilTest { assertEquals(0x8000000000000000L, BitUtil.roundUpToPowerOf2(0x8000000000000000L - 1)); } + @Test + public void testPowerOf2Factor() { + assertEquals(BitUtil.roundUpToPowerOf2Factor(7, 8), 8); + assertEquals(BitUtil.roundUpToPowerOf2Factor(8, 8), 8); + assertEquals(BitUtil.roundUpToPowerOf2Factor(9, 8), 16); + } } http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test index f25ad0a..533ac42 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test @@ -5,7 +5,7 @@ where 5 + 5 < c_custkey and o_orderkey = (2 + 2) and (coalesce(2, 3, 4) * 10) + l_linenumber < (0 * 1) ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=176.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=264.00MB mem-reservation=24.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -59,7 +59,7 @@ PLAN-ROOT SINK parquet dictionary predicates: c_custkey > 10 parquet dictionary predicates on o: o_orderkey = 4 parquet dictionary predicates on o_lineitems: 20 + l_linenumber < 0 - mem-estimate=176.00MB mem-reservation=0B + mem-estimate=264.00MB mem-reservation=24.00MB tuple-ids=0 row-size=24B cardinality=15000 ==== # Test HBase scan node. @@ -107,7 +107,7 @@ having 1024 * 1024 * count(*) % 2 = 0 and (sm between 5 and 10) ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.94MB +| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.97MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -125,7 +125,7 @@ PLAN-ROOT SINK partitions: 24/24 rows=7300 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=32.00KB tuple-ids=0 row-size=20B cardinality=7300 ==== # Test hash join. @@ -136,7 +136,7 @@ left outer join functional.alltypes b where round(1.11 + 2.22 + 3.33 + 4.44, 1) < cast(b.double_col as decimal(3, 2)) ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=257.94MB mem-reservation=1.94MB +| Per-Host Resources: mem-estimate=257.94MB mem-reservation=2.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -157,7 +157,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled | parquet dictionary predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1 -| mem-estimate=128.00MB mem-reservation=0B +| mem-estimate=128.00MB mem-reservation=32.00KB | tuple-ids=1 row-size=20B cardinality=730 | 00:SCAN HDFS [functional.alltypes a] @@ -167,7 +167,7 @@ PLAN-ROOT SINK partitions: 24/24 rows=7300 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=32.00KB tuple-ids=0 row-size=8B cardinality=7300 ==== # Test nested-loop join. Same as above but and with a disjunction in the On clause. @@ -179,7 +179,7 @@ left outer join functional.alltypes b where cast(b.double_col as decimal(3, 2)) > round(1.11 + 2.22 + 3.33 + 4.44, 1) ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=256.01MB mem-reservation=0B +| Per-Host Resources: mem-estimate=256.01MB mem-reservation=64.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -198,7 +198,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled | parquet dictionary predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1 -| mem-estimate=128.00MB mem-reservation=0B +| mem-estimate=128.00MB mem-reservation=32.00KB | tuple-ids=1 row-size=20B cardinality=730 | 00:SCAN HDFS [functional.alltypes a] @@ -208,7 +208,7 @@ PLAN-ROOT SINK partitions: 24/24 rows=7300 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=32.00KB tuple-ids=0 row-size=8B cardinality=7300 ==== # Test distinct aggregation with grouping. @@ -242,7 +242,7 @@ PLAN-ROOT SINK partitions: 24/24 rows=7300 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=32.00KB tuple-ids=0 row-size=20B cardinality=7300 ==== # Test non-grouping distinct aggregation. @@ -251,7 +251,7 @@ from functional.alltypes having 1024 * 1024 * count(*) % 2 = 0 ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.94MB +| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.97MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -274,7 +274,7 @@ PLAN-ROOT SINK partitions: 24/24 rows=7300 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=32.00KB tuple-ids=0 row-size=4B cardinality=7300 ==== # Test analytic eval node. @@ -284,7 +284,7 @@ select first_value(1 + 1 + int_col - (1 - 1)) over from functional.alltypes ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=144.00MB mem-reservation=16.00MB +| Per-Host Resources: mem-estimate=144.00MB mem-reservation=16.03MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -309,7 +309,7 @@ PLAN-ROOT SINK partitions: 24/24 rows=7300 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=32.00KB tuple-ids=0 row-size=29B cardinality=7300 ==== # Test sort node. @@ -317,7 +317,7 @@ select int_col from functional.alltypes order by id * abs((factorial(5) / power(2, 4))) ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=134.00MB mem-reservation=6.00MB +| Per-Host Resources: mem-estimate=134.00MB mem-reservation=6.03MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -333,7 +333,7 @@ PLAN-ROOT SINK partitions: 24/24 rows=7300 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=32.00KB tuple-ids=0 row-size=8B cardinality=7300 ==== # Test HDFS table sink. @@ -342,7 +342,7 @@ select id, int_col, cast(1 + 1 + 1 + year as int), cast(month - (1 - 1 - 1) as i from functional.alltypessmall ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=32.00MB mem-reservation=8.00KB WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(CAST(3 + year AS INT),CAST(month - -1 AS INT))] | partitions=4 | mem-estimate=1.56KB mem-reservation=0B @@ -354,7 +354,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(CAST(3 + ye partitions: 4/4 rows=100 columns: all extrapolated-rows=disabled - mem-estimate=32.00MB mem-reservation=0B + mem-estimate=32.00MB mem-reservation=8.00KB tuple-ids=0 row-size=16B cardinality=100 ==== # Constant folding does not work across query blocks. @@ -366,7 +366,7 @@ select sum(id + c3) from ) v3 ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=138.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=138.00MB mem-reservation=32.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -383,6 +383,6 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled limit: 2 - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=32.00KB tuple-ids=0 row-size=4B cardinality=2 ==== http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test b/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test index c5081a7..3d234d1 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test @@ -1,7 +1,7 @@ # Rows per node is < 3000: codegen should be disabled. select count(*) from functional.alltypes ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=0B +Max Per-Host Resource Reservation: Memory=32.00KB Per-Host Resource Estimates: Memory=148.00MB Codegen disabled by planner @@ -21,7 +21,7 @@ PLAN-ROOT SINK # Rows per node is > 3000: codegen should be enabled. select count(*) from functional.alltypesagg ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=0B +Max Per-Host Resource Reservation: Memory=128.00KB Per-Host Resource Estimates: Memory=100.00MB PLAN-ROOT SINK @@ -40,7 +40,7 @@ PLAN-ROOT SINK # No stats on functional_parquet: codegen should be disabled. select count(*) from functional_parquet.alltypes ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=0B +Max Per-Host Resource Reservation: Memory=8.00KB Per-Host Resource Estimates: Memory=21.00MB WARNING: The following tables are missing relevant table and/or column statistics. functional_parquet.alltypes @@ -61,7 +61,7 @@ PLAN-ROOT SINK # > 3000 rows returned to coordinator: codegen should be enabled select * from functional_parquet.alltypes ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=0B +Max Per-Host Resource Reservation: Memory=88.00KB Per-Host Resource Estimates: Memory=128.00MB WARNING: The following tables are missing relevant table and/or column statistics. functional_parquet.alltypes @@ -78,7 +78,7 @@ select count(*) from functional.alltypes t1 join functional.alltypestiny t2 on t1.id = t2.id ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=2.94MB +Max Per-Host Resource Reservation: Memory=2.98MB Per-Host Resource Estimates: Memory=182.94MB Codegen disabled by planner @@ -108,7 +108,7 @@ PLAN-ROOT SINK # Optimisation is disabled by cross join producing > 3000 rows select count(*) from functional.alltypes t1, functional.alltypes t2 ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=0B +Max Per-Host Resource Reservation: Memory=64.00KB Per-Host Resource Estimates: Memory=276.00MB PLAN-ROOT SINK @@ -137,7 +137,7 @@ select count(*) from ( union all select * from functional.alltypestiny) v ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=0B +Max Per-Host Resource Reservation: Memory=32.00KB Per-Host Resource Estimates: Memory=148.00MB Codegen disabled by planner @@ -166,7 +166,7 @@ select count(*) from ( union all select * from functional.alltypes) v ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=0B +Max Per-Host Resource Reservation: Memory=32.00KB Per-Host Resource Estimates: Memory=148.00MB PLAN-ROOT SINK @@ -193,7 +193,7 @@ PLAN-ROOT SINK select sum(l_discount) from (select * from tpch.lineitem limit 1000) v ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=0B +Max Per-Host Resource Reservation: Memory=8.00MB Per-Host Resource Estimates: Memory=274.00MB Codegen disabled by planner @@ -214,7 +214,7 @@ PLAN-ROOT SINK select sum(l_discount) from (select * from tpch.lineitem where l_orderkey > 100 limit 1000) v ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=0B +Max Per-Host Resource Reservation: Memory=8.00MB Per-Host Resource Estimates: Memory=274.00MB PLAN-ROOT SINK http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test index bb12bca..55439d6 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test @@ -5,7 +5,7 @@ on ss_customer_sk = c_customer_sk where c_salutation = 'Mrs.' ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=185.50MB mem-reservation=9.50MB runtime-filters-memory=1.00MB +| Per-Host Resources: mem-estimate=185.50MB mem-reservation=25.50MB runtime-filters-memory=1.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -24,7 +24,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled | parquet dictionary predicates: c_salutation = 'Mrs.' -| mem-estimate=48.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=255B cardinality=16667 | 00:SCAN HDFS [tpcds.store_sales] @@ -35,7 +35,7 @@ PLAN-ROOT SINK partitions: 1824/1824 rows=2880404 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=8.00MB tuple-ids=0 row-size=100B cardinality=2880404 ==== # Single-column FK/PK join detection on left outer join. The join cardinality @@ -46,7 +46,7 @@ on ss_customer_sk = c_customer_sk where c_salutation = 'Mrs.' ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=184.50MB mem-reservation=8.50MB +| Per-Host Resources: mem-estimate=184.50MB mem-reservation=24.50MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -65,7 +65,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled | parquet dictionary predicates: c_salutation = 'Mrs.' -| mem-estimate=48.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=255B cardinality=16667 | 00:SCAN HDFS [tpcds.store_sales] @@ -75,7 +75,7 @@ PLAN-ROOT SINK partitions: 1824/1824 rows=2880404 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=8.00MB tuple-ids=0 row-size=100B cardinality=2880404 ==== # Single-column FK/PK join detection on right outer join. The join cardinality @@ -86,7 +86,7 @@ on ss_customer_sk = c_customer_sk where c_salutation = 'Mrs.' ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=185.50MB mem-reservation=9.50MB runtime-filters-memory=1.00MB +| Per-Host Resources: mem-estimate=185.50MB mem-reservation=25.50MB runtime-filters-memory=1.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -105,7 +105,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled | parquet dictionary predicates: c_salutation = 'Mrs.' -| mem-estimate=48.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=255B cardinality=16667 | 00:SCAN HDFS [tpcds.store_sales] @@ -116,7 +116,7 @@ PLAN-ROOT SINK partitions: 1824/1824 rows=2880404 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=8.00MB tuple-ids=0 row-size=100B cardinality=2880404 ==== # Multi-column FK/PK join detection @@ -126,7 +126,7 @@ on ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number where sr_return_quantity < 10 ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=214.75MB mem-reservation=6.75MB runtime-filters-memory=2.00MB +| Per-Host Resources: mem-estimate=214.75MB mem-reservation=22.75MB runtime-filters-memory=2.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -145,7 +145,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled | parquet dictionary predicates: sr_return_quantity < 10 -| mem-estimate=80.00MB mem-reservation=0B +| mem-estimate=80.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=88B cardinality=28751 | 00:SCAN HDFS [tpcds.store_sales] @@ -156,7 +156,7 @@ PLAN-ROOT SINK partitions: 1824/1824 rows=2880404 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=8.00MB tuple-ids=0 row-size=100B cardinality=2880404 ==== # Many-to-many join detection. @@ -165,7 +165,7 @@ tpcds.store_sales inner join tpcds.web_sales on ss_sold_time_sk = ws_sold_time_sk ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=397.67MB mem-reservation=35.00MB runtime-filters-memory=1.00MB +| Per-Host Resources: mem-estimate=397.67MB mem-reservation=51.00MB runtime-filters-memory=1.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -182,7 +182,7 @@ PLAN-ROOT SINK | table: rows=719384 size=140.07MB | columns: all | extrapolated-rows=disabled -| mem-estimate=160.00MB mem-reservation=0B +| mem-estimate=160.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=144B cardinality=719384 | 00:SCAN HDFS [tpcds.store_sales] @@ -193,7 +193,7 @@ PLAN-ROOT SINK partitions: 1824/1824 rows=2880404 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=8.00MB tuple-ids=0 row-size=100B cardinality=2880404 ==== # PK/PK join is detected as FK/PK. @@ -203,7 +203,7 @@ on a.d_date_sk = b.d_date_sk where a.d_holiday = "Y" ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=114.00MB mem-reservation=18.00MB runtime-filters-memory=1.00MB +| Per-Host Resources: mem-estimate=114.00MB mem-reservation=34.00MB runtime-filters-memory=1.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -222,7 +222,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled | parquet dictionary predicates: a.d_holiday = 'Y' -| mem-estimate=48.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=8.00MB | tuple-ids=0 row-size=303B cardinality=36525 | 01:SCAN HDFS [tpcds.date_dim b] @@ -232,7 +232,7 @@ PLAN-ROOT SINK table: rows=73049 size=9.84MB columns: all extrapolated-rows=disabled - mem-estimate=48.00MB mem-reservation=0B + mem-estimate=48.00MB mem-reservation=8.00MB tuple-ids=1 row-size=303B cardinality=73049 ==== # Single query with various join types combined. @@ -246,7 +246,7 @@ where ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and d1.d_fy_week_seq = 1000 ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=362.81MB mem-reservation=12.75MB runtime-filters-memory=5.00MB +| Per-Host Resources: mem-estimate=362.81MB mem-reservation=50.81MB runtime-filters-memory=5.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -263,7 +263,7 @@ PLAN-ROOT SINK | table: rows=100000 size=12.60MB | columns: all | extrapolated-rows=disabled -| mem-estimate=48.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=8.00MB | tuple-ids=2 row-size=4B cardinality=100000 | 07:HASH JOIN [INNER JOIN] @@ -279,7 +279,7 @@ PLAN-ROOT SINK | table: rows=73049 size=9.84MB | columns: all | extrapolated-rows=disabled -| mem-estimate=48.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=8.00MB | tuple-ids=4 row-size=4B cardinality=73049 | 06:HASH JOIN [INNER JOIN] @@ -304,7 +304,7 @@ PLAN-ROOT SINK | | columns: all | | extrapolated-rows=disabled | | parquet dictionary predicates: d1.d_fy_week_seq = 1000 -| | mem-estimate=48.00MB mem-reservation=0B +| | mem-estimate=48.00MB mem-reservation=8.00MB | | tuple-ids=3 row-size=8B cardinality=7 | | | 00:SCAN HDFS [tpcds.store_sales] @@ -315,7 +315,7 @@ PLAN-ROOT SINK | partitions: 1824/1824 rows=2880404 | columns: all | extrapolated-rows=disabled -| mem-estimate=128.00MB mem-reservation=0B +| mem-estimate=128.00MB mem-reservation=8.00MB | tuple-ids=0 row-size=24B cardinality=2880404 | 01:SCAN HDFS [tpcds.store_returns] @@ -325,7 +325,7 @@ PLAN-ROOT SINK table: rows=287514 size=31.19MB columns: all extrapolated-rows=disabled - mem-estimate=80.00MB mem-reservation=0B + mem-estimate=80.00MB mem-reservation=8.00MB tuple-ids=1 row-size=20B cardinality=287514 ==== # Assumed FK/PK join becasue of non-trivial equi-join exprs. @@ -334,7 +334,7 @@ tpcds.store_sales inner join tpcds.customer on ss_customer_sk % 10 = c_customer_sk / 100 ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=211.00MB mem-reservation=35.00MB runtime-filters-memory=1.00MB +| Per-Host Resources: mem-estimate=211.00MB mem-reservation=51.00MB runtime-filters-memory=1.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -351,7 +351,7 @@ PLAN-ROOT SINK | table: rows=100000 size=12.60MB | columns: all | extrapolated-rows=disabled -| mem-estimate=48.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=255B cardinality=100000 | 00:SCAN HDFS [tpcds.store_sales] @@ -362,7 +362,7 @@ PLAN-ROOT SINK partitions: 1824/1824 rows=2880404 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=8.00MB tuple-ids=0 row-size=100B cardinality=2880404 ==== # Assumed FK/PK join due to missing stats on the rhs. Join cardinality is equal to @@ -372,7 +372,7 @@ tpcds.store_sales inner join tpcds_seq_snap.customer on ss_customer_sk = c_customer_sk ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=2.17GB mem-reservation=35.00MB runtime-filters-memory=1.00MB +| Per-Host Resources: mem-estimate=2.17GB mem-reservation=51.00MB runtime-filters-memory=1.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -384,12 +384,12 @@ PLAN-ROOT SINK | tuple-ids=0,1 row-size=8B cardinality=2880404 | |--01:SCAN HDFS [tpcds_seq_snap.customer] -| partitions=1/1 files=1 size=8.59MB +| partitions=1/1 files=1 size=8.58MB | stored statistics: -| table: rows=unavailable size=8.59MB +| table: rows=unavailable size=8.58MB | columns: unavailable | extrapolated-rows=disabled -| mem-estimate=48.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=4B cardinality=unavailable | 00:SCAN HDFS [tpcds.store_sales] @@ -400,7 +400,7 @@ PLAN-ROOT SINK partitions: 1824/1824 rows=2880404 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=8.00MB tuple-ids=0 row-size=4B cardinality=2880404 ==== # Assumed FK/PK join due to missing stats on the lhs. Join cardinality is unknown. @@ -409,7 +409,7 @@ tpcds_seq_snap.store_sales inner join tpcds.customer on ss_customer_sk = c_customer_sk ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=178.94MB mem-reservation=2.94MB runtime-filters-memory=1.00MB +| Per-Host Resources: mem-estimate=178.94MB mem-reservation=18.94MB runtime-filters-memory=1.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -426,18 +426,18 @@ PLAN-ROOT SINK | table: rows=100000 size=12.60MB | columns: all | extrapolated-rows=disabled -| mem-estimate=48.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=4B cardinality=100000 | 00:SCAN HDFS [tpcds_seq_snap.store_sales] - partitions=1824/1824 files=1824 size=207.90MB + partitions=1824/1824 files=1824 size=207.85MB runtime filters: RF000[bloom] -> ss_customer_sk stored statistics: table: rows=unavailable size=unavailable partitions: 0/1824 rows=unavailable columns: unavailable extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=8.00MB tuple-ids=0 row-size=4B cardinality=unavailable ==== # Join is detected as many-to-many even though the rhs join columns @@ -448,7 +448,7 @@ tpcds.store_sales inner join on ss_sold_time_sk = ws_sold_time_sk ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=299.00MB mem-reservation=4.88MB runtime-filters-memory=1.00MB +| Per-Host Resources: mem-estimate=299.00MB mem-reservation=18.94MB runtime-filters-memory=1.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -470,7 +470,7 @@ PLAN-ROOT SINK | table: rows=719384 size=140.07MB | columns: all | extrapolated-rows=disabled -| mem-estimate=160.00MB mem-reservation=0B +| mem-estimate=160.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=4B cardinality=719384 | 00:SCAN HDFS [tpcds.store_sales] @@ -481,6 +481,6 @@ PLAN-ROOT SINK partitions: 1824/1824 rows=2880404 columns: all extrapolated-rows=disabled - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=8.00MB tuple-ids=0 row-size=100B cardinality=2880404 ==== http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test b/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test index 96015e0..1df9270 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test @@ -4,7 +4,7 @@ select straight_join * from tpch_parquet.customer inner join tpch_parquet.nation on c_nationkey = n_nationkey ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=17.94MB +Max Per-Host Resource Reservation: Memory=33.97MB Per-Host Resource Estimates: Memory=57.94MB F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -17,7 +17,7 @@ PLAN-ROOT SINK | tuple-ids=0,1 row-size=355B cardinality=150000 | F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 -Per-Host Resources: mem-estimate=41.94MB mem-reservation=17.94MB runtime-filters-memory=1.00MB +Per-Host Resources: mem-estimate=41.94MB mem-reservation=33.94MB runtime-filters-memory=1.00MB 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: c_nationkey = n_nationkey | fk/pk conjuncts: c_nationkey = n_nationkey @@ -30,14 +30,14 @@ Per-Host Resources: mem-estimate=41.94MB mem-reservation=17.94MB runtime-filters | | tuple-ids=1 row-size=117B cardinality=25 | | | F01:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=16.00MB mem-reservation=32.00KB | 01:SCAN HDFS [tpch_parquet.nation, RANDOM] | partitions=1/1 files=1 size=2.74KB | stored statistics: | table: rows=25 size=2.74KB | columns: all | extrapolated-rows=disabled -| mem-estimate=16.00MB mem-reservation=0B +| mem-estimate=16.00MB mem-reservation=32.00KB | tuple-ids=1 row-size=117B cardinality=25 | 00:SCAN HDFS [tpch_parquet.customer, RANDOM] @@ -47,7 +47,7 @@ Per-Host Resources: mem-estimate=41.94MB mem-reservation=17.94MB runtime-filters table: rows=150000 size=12.31MB columns: all extrapolated-rows=disabled - mem-estimate=24.00MB mem-reservation=0B + mem-estimate=24.00MB mem-reservation=16.00MB tuple-ids=0 row-size=238B cardinality=150000 ==== # Join with large build side. @@ -56,8 +56,8 @@ select straight_join * from tpch_parquet.lineitem left join tpch_parquet.orders on l_orderkey = o_orderkey ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=46.00MB -Per-Host Resource Estimates: Memory=420.41MB +Max Per-Host Resource Reservation: Memory=166.00MB +Per-Host Resource Estimates: Memory=428.41MB F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 | Per-Host Resources: mem-estimate=0B mem-reservation=0B @@ -69,7 +69,7 @@ PLAN-ROOT SINK | tuple-ids=0,1N row-size=454B cardinality=6001215 | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 -Per-Host Resources: mem-estimate=380.41MB mem-reservation=46.00MB +Per-Host Resources: mem-estimate=380.41MB mem-reservation=118.00MB 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: l_orderkey = o_orderkey | fk/pk conjuncts: l_orderkey = o_orderkey @@ -81,23 +81,23 @@ Per-Host Resources: mem-estimate=380.41MB mem-reservation=46.00MB | | tuple-ids=1 row-size=191B cardinality=1500000 | | | F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2 -| Per-Host Resources: mem-estimate=40.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=48.00MB mem-reservation=48.00MB | 01:SCAN HDFS [tpch_parquet.orders, RANDOM] | partitions=1/1 files=2 size=54.07MB | stored statistics: | table: rows=1500000 size=54.07MB | columns: all | extrapolated-rows=disabled -| mem-estimate=40.00MB mem-reservation=0B +| mem-estimate=48.00MB mem-reservation=48.00MB | tuple-ids=1 row-size=191B cardinality=1500000 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.73MB + partitions=1/1 files=3 size=193.72MB stored statistics: - table: rows=6001215 size=193.73MB + table: rows=6001215 size=193.72MB columns: all extrapolated-rows=disabled - mem-estimate=80.00MB mem-reservation=0B + mem-estimate=80.00MB mem-reservation=72.00MB tuple-ids=0 row-size=263B cardinality=6001215 ==== # Null-aware anti-join with medium build side. @@ -105,7 +105,7 @@ Per-Host Resources: mem-estimate=380.41MB mem-reservation=46.00MB select * from tpch_parquet.lineitem where l_orderkey not in (select o_orderkey from tpch_parquet.orders) ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=34.00MB +Max Per-Host Resource Reservation: Memory=114.00MB Per-Host Resource Estimates: Memory=154.00MB F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -118,7 +118,7 @@ PLAN-ROOT SINK | tuple-ids=0 row-size=263B cardinality=6001215 | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 -Per-Host Resources: mem-estimate=114.00MB mem-reservation=34.00MB +Per-Host Resources: mem-estimate=114.00MB mem-reservation=106.00MB 02:HASH JOIN [NULL AWARE LEFT ANTI JOIN, BROADCAST] | hash predicates: l_orderkey = o_orderkey | mem-estimate=34.00MB mem-reservation=34.00MB spill-buffer=1.00MB @@ -129,23 +129,23 @@ Per-Host Resources: mem-estimate=114.00MB mem-reservation=34.00MB | | tuple-ids=1 row-size=8B cardinality=1500000 | | | F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2 -| Per-Host Resources: mem-estimate=40.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=40.00MB mem-reservation=8.00MB | 01:SCAN HDFS [tpch_parquet.orders, RANDOM] | partitions=1/1 files=2 size=54.07MB | stored statistics: | table: rows=1500000 size=54.07MB | columns: all | extrapolated-rows=disabled -| mem-estimate=40.00MB mem-reservation=0B +| mem-estimate=40.00MB mem-reservation=8.00MB | tuple-ids=1 row-size=8B cardinality=1500000 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.73MB + partitions=1/1 files=3 size=193.72MB stored statistics: - table: rows=6001215 size=193.73MB + table: rows=6001215 size=193.72MB columns: all extrapolated-rows=disabled - mem-estimate=80.00MB mem-reservation=0B + mem-estimate=80.00MB mem-reservation=72.00MB tuple-ids=0 row-size=263B cardinality=6001215 ==== # Mid NDV aggregation. @@ -156,7 +156,7 @@ from tpch_parquet.lineitem group by 1, 2 having count(*) = 1 ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=113.00MB +Max Per-Host Resource Reservation: Memory=137.00MB Per-Host Resource Estimates: Memory=253.12MB F04:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -201,14 +201,14 @@ Per-Host Resources: mem-estimate=86.12MB mem-reservation=66.00MB runtime-filters | | tuple-ids=1 row-size=25B cardinality=1500000 | | | F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2 -| Per-Host Resources: mem-estimate=40.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=40.00MB mem-reservation=16.00MB | 01:SCAN HDFS [tpch_parquet.orders, RANDOM] | partitions=1/1 files=2 size=54.07MB | stored statistics: | table: rows=1500000 size=54.07MB | columns: all | extrapolated-rows=disabled -| mem-estimate=40.00MB mem-reservation=0B +| mem-estimate=40.00MB mem-reservation=16.00MB | tuple-ids=1 row-size=25B cardinality=1500000 | 04:EXCHANGE [HASH(l_orderkey)] @@ -216,15 +216,15 @@ Per-Host Resources: mem-estimate=86.12MB mem-reservation=66.00MB runtime-filters | tuple-ids=0 row-size=8B cardinality=6001215 | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 -Per-Host Resources: mem-estimate=81.00MB mem-reservation=1.00MB runtime-filters-memory=1.00MB +Per-Host Resources: mem-estimate=81.00MB mem-reservation=9.00MB runtime-filters-memory=1.00MB 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.73MB + partitions=1/1 files=3 size=193.72MB runtime filters: RF000[bloom] -> l_orderkey stored statistics: - table: rows=6001215 size=193.73MB + table: rows=6001215 size=193.72MB columns: all extrapolated-rows=disabled - mem-estimate=80.00MB mem-reservation=0B + mem-estimate=80.00MB mem-reservation=8.00MB tuple-ids=0 row-size=8B cardinality=6001215 ==== # High NDV aggregation. @@ -232,7 +232,7 @@ Per-Host Resources: mem-estimate=81.00MB mem-reservation=1.00MB runtime-filters- select distinct * from tpch_parquet.lineitem ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=80.00MB +Max Per-Host Resource Reservation: Memory=152.00MB Per-Host Resource Estimates: Memory=3.31GB F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -256,19 +256,19 @@ Per-Host Resources: mem-estimate=1.62GB mem-reservation=46.00MB | tuple-ids=1 row-size=263B cardinality=6001215 | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 -Per-Host Resources: mem-estimate=1.69GB mem-reservation=34.00MB +Per-Host Resources: mem-estimate=1.69GB mem-reservation=106.00MB 01:AGGREGATE [STREAMING] | group by: tpch_parquet.lineitem.l_orderkey, tpch_parquet.lineitem.l_partkey, tpch_parquet.lineitem.l_suppkey, tpch_parquet.lineitem.l_linenumber, tpch_parquet.lineitem.l_quantity, tpch_parquet.lineitem.l_extendedprice, tpch_parquet.lineitem.l_discount, tpch_parquet.lineitem.l_tax, tpch_parquet.lineitem.l_returnflag, tpch_parquet.lineitem.l_linestatus, tpch_parquet.lineitem.l_shipdate, tpch_parquet.lineitem.l_commitdate, tpch_parquet.lineitem.l_receiptdate, tpch_parquet.lineitem.l_shipinstruct, tpch_parquet.lineitem.l_shipmode, tpch_parquet.lineitem.l_comment | mem-estimate=1.62GB mem-reservation=34.00MB spill-buffer=2.00MB | tuple-ids=1 row-size=263B cardinality=6001215 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.73MB + partitions=1/1 files=3 size=193.72MB stored statistics: - table: rows=6001215 size=193.73MB + table: rows=6001215 size=193.72MB columns: all extrapolated-rows=disabled - mem-estimate=80.00MB mem-reservation=0B + mem-estimate=80.00MB mem-reservation=72.00MB tuple-ids=0 row-size=263B cardinality=6001215 ==== # High NDV aggregation with string aggregation function. @@ -277,7 +277,7 @@ select l_orderkey, l_partkey, group_concat(l_linestatus, ",") from tpch_parquet.lineitem group by 1, 2 ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=82.00MB +Max Per-Host Resource Reservation: Memory=106.00MB Per-Host Resource Estimates: Memory=482.91MB F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -302,7 +302,7 @@ Per-Host Resources: mem-estimate=201.46MB mem-reservation=48.00MB | tuple-ids=1 row-size=32B cardinality=6001215 | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 -Per-Host Resources: mem-estimate=281.46MB mem-reservation=34.00MB +Per-Host Resources: mem-estimate=281.46MB mem-reservation=58.00MB 01:AGGREGATE [STREAMING] | output: group_concat(l_linestatus, ',') | group by: l_orderkey, l_partkey @@ -310,12 +310,12 @@ Per-Host Resources: mem-estimate=281.46MB mem-reservation=34.00MB | tuple-ids=1 row-size=32B cardinality=6001215 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.73MB + partitions=1/1 files=3 size=193.72MB stored statistics: - table: rows=6001215 size=193.73MB + table: rows=6001215 size=193.72MB columns: all extrapolated-rows=disabled - mem-estimate=80.00MB mem-reservation=0B + mem-estimate=80.00MB mem-reservation=24.00MB tuple-ids=0 row-size=33B cardinality=6001215 ==== # Sort + Analytic. @@ -323,7 +323,7 @@ Per-Host Resources: mem-estimate=281.46MB mem-reservation=34.00MB select max(tinyint_col) over(partition by int_col) from functional.alltypes ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=40.00MB +Max Per-Host Resource Reservation: Memory=40.03MB Per-Host Resource Estimates: Memory=56.00MB Codegen disabled by planner @@ -354,7 +354,7 @@ Per-Host Resources: mem-estimate=40.00MB mem-reservation=40.00MB | tuple-ids=0 row-size=5B cardinality=7300 | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 -Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B +Per-Host Resources: mem-estimate=16.00MB mem-reservation=32.00KB 00:SCAN HDFS [functional.alltypes, RANDOM] partitions=24/24 files=24 size=478.45KB stored statistics: @@ -362,6 +362,6 @@ Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B partitions: 24/24 rows=7300 columns: all extrapolated-rows=disabled - mem-estimate=16.00MB mem-reservation=0B + mem-estimate=16.00MB mem-reservation=32.00KB tuple-ids=0 row-size=5B cardinality=7300 ==== http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test b/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test index 7f6d96b..8dd2593 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test @@ -99,7 +99,7 @@ select count(*) from functional_kudu.alltypes a, functional_parquet.alltypes b, where a.int_col = b.int_col and a.int_col = c.int_col ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=2.02GB mem-reservation=36.94MB runtime-filters-memory=1.00MB +| Per-Host Resources: mem-estimate=2.02GB mem-reservation=36.95MB runtime-filters-memory=1.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -127,14 +127,14 @@ PLAN-ROOT SINK | tuple-ids=0,1 row-size=8B cardinality=7300 | |--01:SCAN HDFS [functional_parquet.alltypes b] -| partitions=24/24 files=24 size=174.39KB +| partitions=24/24 files=24 size=174.62KB | runtime filters: RF000[bloom] -> b.int_col | stored statistics: | table: rows=unavailable size=unavailable | partitions: 0/24 rows=unavailable | columns: unavailable | extrapolated-rows=disabled -| mem-estimate=16.00MB mem-reservation=0B +| mem-estimate=16.00MB mem-reservation=8.00KB | tuple-ids=1 row-size=4B cardinality=unavailable | 00:SCAN KUDU [functional_kudu.alltypes a] http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test index 0bcb1a9..b741019 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test @@ -40,7 +40,7 @@ order by cnt, bigint_col limit 10 ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=144.00MB mem-reservation=34.00MB +| Per-Host Resources: mem-estimate=144.00MB mem-reservation=34.02MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -56,7 +56,7 @@ PLAN-ROOT SINK | tuple-ids=1 row-size=16B cardinality=unavailable | 00:SCAN HDFS [functional_parquet.alltypes] - partitions=24/24 files=24 size=179.19KB + partitions=24/24 files=24 size=174.39KB predicates: id < 10 stored statistics: table: rows=unavailable size=unavailable @@ -65,7 +65,7 @@ PLAN-ROOT SINK extrapolated-rows=disabled parquet statistics predicates: id < 10 parquet dictionary predicates: id < 10 - mem-estimate=16.00MB mem-reservation=0B + mem-estimate=16.00MB mem-reservation=24.00KB tuple-ids=0 row-size=16B cardinality=unavailable ---- PARALLELPLANS F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -97,7 +97,7 @@ Per-Host Resources: mem-estimate=384.00MB mem-reservation=102.00MB | tuple-ids=1 row-size=16B cardinality=unavailable | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9 -Per-Host Resources: mem-estimate=432.00MB mem-reservation=102.00MB +Per-Host Resources: mem-estimate=432.00MB mem-reservation=102.07MB 01:AGGREGATE [STREAMING] | output: count(int_col) | group by: bigint_col @@ -105,7 +105,7 @@ Per-Host Resources: mem-estimate=432.00MB mem-reservation=102.00MB | tuple-ids=1 row-size=16B cardinality=unavailable | 00:SCAN HDFS [functional_parquet.alltypes, RANDOM] - partitions=24/24 files=24 size=179.19KB + partitions=24/24 files=24 size=174.39KB predicates: id < 10 stored statistics: table: rows=unavailable size=unavailable @@ -114,7 +114,7 @@ Per-Host Resources: mem-estimate=432.00MB mem-reservation=102.00MB extrapolated-rows=disabled parquet statistics predicates: id < 10 parquet dictionary predicates: id < 10 - mem-estimate=16.00MB mem-reservation=0B + mem-estimate=16.00MB mem-reservation=24.00KB tuple-ids=0 row-size=16B cardinality=unavailable ==== # Single-table scan/filter/analytic should work. @@ -123,7 +123,7 @@ from functional_parquet.alltypes where id < 10 ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=26.00MB mem-reservation=10.00MB +| Per-Host Resources: mem-estimate=26.00MB mem-reservation=10.02MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -141,7 +141,7 @@ PLAN-ROOT SINK | tuple-ids=4 row-size=8B cardinality=unavailable | 00:SCAN HDFS [functional_parquet.alltypes] - partitions=24/24 files=24 size=179.19KB + partitions=24/24 files=24 size=174.39KB predicates: id < 10 stored statistics: table: rows=unavailable size=unavailable @@ -150,7 +150,7 @@ PLAN-ROOT SINK extrapolated-rows=disabled parquet statistics predicates: id < 10 parquet dictionary predicates: id < 10 - mem-estimate=16.00MB mem-reservation=0B + mem-estimate=16.00MB mem-reservation=16.00KB tuple-ids=0 row-size=8B cardinality=unavailable ---- PARALLELPLANS F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -182,9 +182,9 @@ Per-Host Resources: mem-estimate=30.00MB mem-reservation=30.00MB | tuple-ids=0 row-size=8B cardinality=unavailable | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9 -Per-Host Resources: mem-estimate=48.00MB mem-reservation=0B +Per-Host Resources: mem-estimate=48.00MB mem-reservation=48.00KB 00:SCAN HDFS [functional_parquet.alltypes, RANDOM] - partitions=24/24 files=24 size=179.19KB + partitions=24/24 files=24 size=174.39KB predicates: id < 10 stored statistics: table: rows=unavailable size=unavailable @@ -193,7 +193,7 @@ Per-Host Resources: mem-estimate=48.00MB mem-reservation=0B extrapolated-rows=disabled parquet statistics predicates: id < 10 parquet dictionary predicates: id < 10 - mem-estimate=16.00MB mem-reservation=0B + mem-estimate=16.00MB mem-reservation=16.00KB tuple-ids=0 row-size=8B cardinality=unavailable ==== # Nested-loop join in a subplan should work. @@ -202,7 +202,7 @@ from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems where c_custkey < 10 and o_orderkey < 5 and l_linenumber < 3 ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=88.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=88.00MB mem-reservation=88.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -257,7 +257,7 @@ PLAN-ROOT SINK parquet dictionary predicates: c_custkey < 10 parquet dictionary predicates on o: o_orderkey < 5 parquet dictionary predicates on o_lineitems: l_linenumber < 3 - mem-estimate=88.00MB mem-reservation=0B + mem-estimate=88.00MB mem-reservation=88.00MB tuple-ids=0 row-size=254B cardinality=15000 ---- PARALLELPLANS F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -270,7 +270,7 @@ PLAN-ROOT SINK | tuple-ids=2,1,0 row-size=562B cardinality=1500000 | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9 -Per-Host Resources: mem-estimate=264.00MB mem-reservation=0B +Per-Host Resources: mem-estimate=264.00MB mem-reservation=264.00MB 01:SUBPLAN | mem-estimate=0B mem-reservation=0B | tuple-ids=2,1,0 row-size=562B cardinality=1500000 @@ -322,7 +322,7 @@ Per-Host Resources: mem-estimate=264.00MB mem-reservation=0B parquet dictionary predicates: c_custkey < 10 parquet dictionary predicates on o: o_orderkey < 5 parquet dictionary predicates on o_lineitems: l_linenumber < 3 - mem-estimate=88.00MB mem-reservation=0B + mem-estimate=88.00MB mem-reservation=88.00MB tuple-ids=0 row-size=254B cardinality=15000 ==== # Hash-join in a subplan should work. @@ -331,7 +331,7 @@ from tpch_nested_parquet.customer c, c.c_orders o1, c.c_orders o2 where o1.o_orderkey = o2.o_orderkey + 2 and o1.o_orderkey < 5 ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=89.94MB mem-reservation=1.94MB +| Per-Host Resources: mem-estimate=89.94MB mem-reservation=81.94MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -374,7 +374,7 @@ PLAN-ROOT SINK extrapolated-rows=disabled parquet statistics predicates on o1: o1.o_orderkey < 5 parquet dictionary predicates on o1: o1.o_orderkey < 5 - mem-estimate=88.00MB mem-reservation=0B + mem-estimate=88.00MB mem-reservation=80.00MB tuple-ids=0 row-size=270B cardinality=150000 ---- PARALLELPLANS F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -387,7 +387,7 @@ PLAN-ROOT SINK | tuple-ids=1,0,2 row-size=286B cardinality=1500000 | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9 -Per-Host Resources: mem-estimate=269.81MB mem-reservation=5.81MB +Per-Host Resources: mem-estimate=269.81MB mem-reservation=245.81MB 01:SUBPLAN | mem-estimate=0B mem-reservation=0B | tuple-ids=1,0,2 row-size=286B cardinality=1500000 @@ -427,6 +427,6 @@ Per-Host Resources: mem-estimate=269.81MB mem-reservation=5.81MB extrapolated-rows=disabled parquet statistics predicates on o1: o1.o_orderkey < 5 parquet dictionary predicates on o1: o1.o_orderkey < 5 - mem-estimate=88.00MB mem-reservation=0B + mem-estimate=88.00MB mem-reservation=80.00MB tuple-ids=0 row-size=270B cardinality=150000 ==== http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test index 2b602c9..3e812c6 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test @@ -8,7 +8,7 @@ where int_col > 1 and int_col * rand() > 50 and int_col is null and int_col > tinyint_col; ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=42.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=42.00MB mem-reservation=16.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -18,7 +18,7 @@ PLAN-ROOT SINK | tuple-ids=1 row-size=8B cardinality=1 | 00:SCAN HDFS [functional_parquet.alltypes] - partitions=24/24 files=24 size=179.19KB + partitions=24/24 files=24 size=174.39KB predicates: int_col IS NULL, int_col > 1, int_col > tinyint_col, int_col * rand() > 50 stored statistics: table: rows=unavailable size=unavailable @@ -27,7 +27,7 @@ PLAN-ROOT SINK extrapolated-rows=disabled parquet statistics predicates: int_col > 1 parquet dictionary predicates: int_col > 1 - mem-estimate=32.00MB mem-reservation=0B + mem-estimate=32.00MB mem-reservation=16.00KB tuple-ids=0 row-size=5B cardinality=unavailable ==== # Test a variety of types @@ -40,7 +40,7 @@ and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1 and year > 2000 and month < 12; ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=138.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -50,7 +50,7 @@ PLAN-ROOT SINK | tuple-ids=1 row-size=8B cardinality=1 | 00:SCAN HDFS [functional_parquet.alltypes] - partitions=22/24 files=22 size=164.09KB + partitions=22/24 files=22 size=159.69KB predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01' stored statistics: table: rows=unavailable size=unavailable @@ -59,7 +59,7 @@ PLAN-ROOT SINK extrapolated-rows=disabled parquet statistics predicates: bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), date_string_col > '1993-10-01' parquet dictionary predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01' - mem-estimate=128.00MB mem-reservation=0B + mem-estimate=128.00MB mem-reservation=88.00KB tuple-ids=0 row-size=80B cardinality=unavailable ==== # Test negative cases for IN predicate min/max filtering @@ -73,7 +73,7 @@ and mod(int_col,50) IN (0,1) and id IN (int_col); ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=58.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=58.00MB mem-reservation=24.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -83,7 +83,7 @@ PLAN-ROOT SINK | tuple-ids=1 row-size=8B cardinality=1 | 00:SCAN HDFS [functional_parquet.alltypes] - partitions=24/24 files=24 size=179.19KB + partitions=24/24 files=24 size=174.39KB predicates: id IN (int_col), id NOT IN (0, 1, 2), string_col IN ('aaaa', 'bbbb', 'cccc', NULL), mod(int_col, 50) IN (0, 1) stored statistics: table: rows=unavailable size=unavailable @@ -91,7 +91,7 @@ PLAN-ROOT SINK columns: unavailable extrapolated-rows=disabled parquet dictionary predicates: id NOT IN (0, 1, 2), string_col IN ('aaaa', 'bbbb', 'cccc', NULL), mod(int_col, 50) IN (0, 1) - mem-estimate=48.00MB mem-reservation=0B + mem-estimate=48.00MB mem-reservation=24.00KB tuple-ids=0 row-size=24B cardinality=unavailable ==== # Test collection types where all collections on the path are required (inner @@ -101,7 +101,7 @@ select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn, cn. where a.item.e < -10; ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=32.00MB mem-reservation=16.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -152,7 +152,7 @@ PLAN-ROOT SINK extrapolated-rows=disabled parquet statistics predicates on a: a.item.e < -10 parquet dictionary predicates on a: a.item.e < -10 - mem-estimate=32.00MB mem-reservation=0B + mem-estimate=32.00MB mem-reservation=16.00KB tuple-ids=0 row-size=24B cardinality=unavailable ==== # Test collection types where the lower collection in the path is optional @@ -164,7 +164,7 @@ left outer join cn.item a where a.item.e < -10; ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=32.00MB mem-reservation=16.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -213,7 +213,7 @@ PLAN-ROOT SINK table: rows=unavailable size=unavailable columns missing stats: id extrapolated-rows=disabled - mem-estimate=32.00MB mem-reservation=0B + mem-estimate=32.00MB mem-reservation=16.00KB tuple-ids=0 row-size=24B cardinality=unavailable ==== # Tests collection types where the outer is optional (outer join descent) @@ -223,7 +223,7 @@ select id from functional_parquet.complextypestbl c left outer join c.nested_struct.c.d cn, cn.item a where a.item.e < -10; ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=32.00MB mem-reservation=16.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -270,7 +270,7 @@ PLAN-ROOT SINK table: rows=unavailable size=unavailable columns missing stats: id extrapolated-rows=disabled - mem-estimate=32.00MB mem-reservation=0B + mem-estimate=32.00MB mem-reservation=16.00KB tuple-ids=0 row-size=24B cardinality=unavailable ==== # Test collections so that each level has a filter applied. @@ -278,7 +278,7 @@ select c_custkey from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems l where c_custkey > 0 and o.o_orderkey > 0 and l.l_partkey > 0; ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=176.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=264.00MB mem-reservation=24.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -333,7 +333,7 @@ PLAN-ROOT SINK parquet dictionary predicates: c_custkey > 0 parquet dictionary predicates on o: o.o_orderkey > 0 parquet dictionary predicates on l: l.l_partkey > 0 - mem-estimate=176.00MB mem-reservation=0B + mem-estimate=264.00MB mem-reservation=24.00MB tuple-ids=0 row-size=24B cardinality=15000 ==== # Test collections in a way that would incorrectly apply a min-max @@ -342,7 +342,7 @@ select count(*) from functional_parquet.complextypestbl c left outer join (select * from c.int_array where item > 10) v; ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=26.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=26.00MB mem-reservation=8.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -376,7 +376,7 @@ PLAN-ROOT SINK table: rows=unavailable size=unavailable columns: unavailable extrapolated-rows=disabled - mem-estimate=16.00MB mem-reservation=0B + mem-estimate=16.00MB mem-reservation=8.00KB tuple-ids=0 row-size=16B cardinality=unavailable ==== # Multiple nested collection values (at the same nesting level) where dictionary @@ -388,7 +388,7 @@ l.l_receiptdate = '1994-08-24' and l.l_shipmode = 'RAIL' and l.l_returnflag = 'R l.l_comment is null; ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=176.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=616.00MB mem-reservation=56.00MB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -439,6 +439,6 @@ PLAN-ROOT SINK extrapolated-rows=disabled parquet statistics predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R' parquet dictionary predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R' - mem-estimate=176.00MB mem-reservation=0B + mem-estimate=616.00MB mem-reservation=56.00MB tuple-ids=0 row-size=50B cardinality=150000 ==== http://git-wip-us.apache.org/repos/asf/impala/blob/9bf324e7/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test b/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test index 610136d..5cbba7b 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test @@ -4,7 +4,7 @@ select * from functional.stringpartitionkey where string_col=cast("2009-01-01 00:00:00" as timestamp); ---- PLAN F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B +| Per-Host Resources: mem-estimate=32.00MB mem-reservation=8.00KB PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | @@ -15,6 +15,6 @@ PLAN-ROOT SINK partitions: 1/1 rows=1 columns: all extrapolated-rows=disabled - mem-estimate=32.00MB mem-reservation=0B + mem-estimate=32.00MB mem-reservation=8.00KB tuple-ids=0 row-size=20B cardinality=1 ====
