This is an automated email from the ASF dual-hosted git repository. tarmstrong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push: new c3a67b6 IMPALA-8849: fix IllegalStateException with VARCHAR c3a67b6 is described below commit c3a67b67faaebf18735fa36d35c80d8c11043f7f Author: Tim Armstrong <tarmstr...@cloudera.com> AuthorDate: Fri Aug 9 17:56:00 2019 -0700 IMPALA-8849: fix IllegalStateException with VARCHAR The bug is that the serialized size wasn't populated for VARCHAR in a case when it should have been. It appears a condition was simply not updated when VARCHAR was added. Other code assumed that the serialized size was populated when the other size field was populated, which is a reasonable invariant. I documented the invariant in the class and added validation that the invariant held. Defining and checking invariants led to discovering various other minor issues where the sizes were set incorrect for fixed-length types or not set for variable-length types: * CHAR was not consistently treated as a fixed-length type. * avgSerializedSize_ was not always updated with avgSize_ Testing: Added a regression test for this specific case. Adding the assertions resulted in other cases showing up related bugs. Change-Id: Ie45e386cb09e31f4b7cdc82b7734dbecb4464534 Reviewed-on: http://gerrit.cloudera.org:8080/14062 Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Reviewed-by: Csaba Ringhofer <csringho...@cloudera.com> --- .../impala/analysis/AlterTableSetColumnStats.java | 4 +- .../org/apache/impala/catalog/ColumnStats.java | 81 +++++++++++++++++++--- .../org/apache/impala/planner/PlannerTest.java | 20 +++--- testdata/bin/compute-table-stats.sh | 2 +- .../queries/PlannerTest/card-scan.test | 70 +++++++++++-------- .../queries/PlannerTest/empty.test | 48 ++++++------- .../QueryTest/compute-stats-incremental.test | 6 +- .../queries/QueryTest/compute-stats.test | 2 +- 8 files changed, 154 insertions(+), 79 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetColumnStats.java b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetColumnStats.java index b68916b..6a3fcee 100644 --- a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetColumnStats.java +++ b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetColumnStats.java @@ -123,7 +123,7 @@ public class AlterTableSetColumnStats extends AlterTableStmt { "Expected a positive integer or -1 for unknown.", statsValue, statsKey)); } - stats.update(statsKey, statsVal); + stats.update(col.getType(), statsKey, statsVal); } else if (statsKey == ColumnStats.StatsKey.AVG_SIZE) { Float statsVal = null; try { @@ -137,7 +137,7 @@ public class AlterTableSetColumnStats extends AlterTableStmt { "Expected a positive floating-point number or -1 for unknown.", statsValue, statsKey)); } - stats.update(statsKey, statsVal); + stats.update(col.getType(), statsKey, statsVal); } else { Preconditions.checkState(false, "Unhandled StatsKey value: " + statsKey); } diff --git a/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java b/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java index 59f079f..46e5f68 100644 --- a/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java +++ b/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java @@ -73,9 +73,12 @@ public class ColumnStats { public String toString() { return name_; } } - // in bytes: excludes serialization overhead + // in bytes: excludes serialization overhead. + // -1 if unknown. Always has a valid value for fixed-length types. private double avgSize_; // in bytes; includes serialization overhead. + // -1 if unknown. Always has a valid value for fixed-length types. + // avgSerializedSize_ is valid iff avgSize_ is valid. private double avgSerializedSize_; private long maxSize_; // in bytes private long numDistinctValues_; @@ -83,6 +86,7 @@ public class ColumnStats { public ColumnStats(Type colType) { initColStats(colType); + validate(colType); } /** @@ -94,6 +98,7 @@ public class ColumnStats { maxSize_ = other.maxSize_; numDistinctValues_ = other.numDistinctValues_; numNulls_ = other.numNulls_; + validate(null); } /** @@ -121,16 +126,20 @@ public class ColumnStats { public static ColumnStats fromExpr(Expr expr) { Preconditions.checkNotNull(expr); Preconditions.checkState(expr.getType().isValid()); - ColumnStats stats = new ColumnStats(expr.getType()); + Type colType = expr.getType(); + ColumnStats stats = new ColumnStats(colType); stats.setNumDistinctValues(expr.getNumDistinctValues()); SlotRef slotRef = expr.unwrapSlotRef(false); if (slotRef == null) return stats; ColumnStats slotStats = slotRef.getDesc().getStats(); if (slotStats == null) return stats; stats.numNulls_ = slotStats.getNumNulls(); - stats.avgSerializedSize_ = slotStats.getAvgSerializedSize(); - stats.avgSize_ = slotStats.getAvgSize(); - stats.maxSize_ = slotStats.getMaxSize(); + if (!colType.isFixedLengthType()) { + stats.avgSerializedSize_ = slotStats.getAvgSerializedSize(); + stats.avgSize_ = slotStats.getAvgSize(); + stats.maxSize_ = slotStats.getMaxSize(); + } + stats.validate(colType); return stats; } @@ -152,6 +161,7 @@ public class ColumnStats { } else { numNulls_ += other.numNulls_; } + validate(null); return this; } @@ -165,6 +175,7 @@ public class ColumnStats { public long getMaxSize() { return maxSize_; } public boolean hasNulls() { return numNulls_ > 0; } public long getNumNulls() { return numNulls_; } + // True iff getAvgSize() and getAvgSerializedSize() will return valid values. public boolean hasAvgSize() { return avgSize_ >= 0; } public boolean hasNumDistinctValues() { return numDistinctValues_ >= 0; } public boolean hasStats() { return numNulls_ != -1 || numDistinctValues_ != -1; } @@ -226,6 +237,14 @@ public class ColumnStats { } break; case CHAR: + // Ignore CHAR length stats, since it is fixed length internally. + isCompatible = statsData.isSetStringStats(); + if (isCompatible) { + StringColumnStatsData stringStats = statsData.getStringStats(); + numDistinctValues_ = stringStats.getNumDVs(); + numNulls_ = stringStats.getNumNulls(); + } + break; case VARCHAR: case STRING: isCompatible = statsData.isSetStringStats(); @@ -235,7 +254,11 @@ public class ColumnStats { numNulls_ = stringStats.getNumNulls(); maxSize_ = stringStats.getMaxColLen(); avgSize_ = Double.valueOf(stringStats.getAvgColLen()).floatValue(); - avgSerializedSize_ = avgSize_ + PrimitiveType.STRING.getSlotSize(); + if (avgSize_ >= 0) { + avgSerializedSize_ = avgSize_ + PrimitiveType.STRING.getSlotSize(); + } else { + avgSerializedSize_ = -1; + } } break; case BINARY: @@ -261,6 +284,7 @@ public class ColumnStats { "Unexpected column type: " + colType.toString()); break; } + validate(colType); return isCompatible; } @@ -340,13 +364,17 @@ public class ColumnStats { * Requires that the given value is of a type appropriate for the * member being set. Throws if that is not the case. */ - public void update(StatsKey key, Number value) { + public void update(Type colType, StatsKey key, Number value) { Preconditions.checkNotNull(key); Preconditions.checkNotNull(value); if (key == StatsKey.AVG_SIZE) { Preconditions.checkArgument(value instanceof Float); + Float floatValue = (Float) value; + Preconditions.checkArgument(floatValue >= 0 || floatValue == -1, floatValue); } else { Preconditions.checkArgument(value instanceof Long); + Long longValue = (Long) value; + Preconditions.checkArgument(longValue >= 0 || longValue == -1, longValue); } switch (key) { case NUM_DISTINCT_VALUES: { @@ -358,15 +386,24 @@ public class ColumnStats { break; } case AVG_SIZE: { + Preconditions.checkArgument(!colType.isFixedLengthType(), colType); avgSize_ = (Float) value; + // Ensure avgSerializedSize_ stays in sync with avgSize_. + if (avgSize_ >= 0) { + avgSerializedSize_ = colType.getSlotSize() + avgSize_; + } else { + avgSerializedSize_ = -1; + } break; } case MAX_SIZE: { + Preconditions.checkArgument(!colType.isFixedLengthType(), colType); maxSize_ = (Long) value; break; } default: Preconditions.checkState(false); } + validate(colType); } /** @@ -380,14 +417,16 @@ public class ColumnStats { public void update(Type colType, TColumnStats stats) { initColStats(colType); - avgSize_ = Double.valueOf(stats.getAvg_size()).floatValue(); - if (colType.getPrimitiveType() == PrimitiveType.STRING || - colType.getPrimitiveType() == PrimitiveType.BINARY) { + if (!colType.isFixedLengthType() && stats.getAvg_size() >= 0) { + // Update size estimates based on average size. Fixed length types already include + // size estimates. + avgSize_ = Double.valueOf(stats.getAvg_size()).floatValue(); avgSerializedSize_ = colType.getSlotSize() + avgSize_; } maxSize_ = stats.getMax_size(); numDistinctValues_ = stats.getNum_distinct_values(); numNulls_ = stats.getNum_nulls(); + validate(colType); } public TColumnStats toThrift() { @@ -399,9 +438,31 @@ public class ColumnStats { return colStats; } + /** + * Check that the stats obey expected invariants. + * 'colType' is optional, but should be passed in if it is available in the caller. + */ + public void validate(Type colType) { + // avgSize_ and avgSerializedSize_ must be set together. + Preconditions.checkState(avgSize_ >= 0 == avgSerializedSize_ >= 0, this); + + // Values must be either valid or -1. + Preconditions.checkState(avgSize_ == -1 || avgSize_ >= 0, this); + Preconditions.checkState(avgSerializedSize_ == -1 || avgSerializedSize_ >= 0, this); + Preconditions.checkState(maxSize_ == -1 || maxSize_ >= 0, this); + Preconditions.checkState(numDistinctValues_ == -1 || numDistinctValues_ >= 0, this); + Preconditions.checkState(numNulls_ == -1 || numNulls_ >= 0, this); + if (colType != null && colType.isFixedLengthType()) { + Preconditions.checkState(avgSize_ == colType.getSlotSize(), this); + Preconditions.checkState(avgSerializedSize_ == colType.getSlotSize(), this); + Preconditions.checkState(maxSize_ == colType.getSlotSize(), this); + } + } + @Override public String toString() { return Objects.toStringHelper(this.getClass()) + .add("avgSize_", avgSize_) .add("avgSerializedSize_", avgSerializedSize_) .add("maxSize_", maxSize_) .add("numDistinct_", numDistinctValues_) diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index 76016c1..5cbcc37 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -54,7 +54,8 @@ public class PlannerTest extends PlannerTestBase { */ @Test public void testScanCardinality() { - runPlannerTestFile("card-scan"); + runPlannerTestFile("card-scan", + ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY)); } /** @@ -62,7 +63,8 @@ public class PlannerTest extends PlannerTestBase { */ @Test public void testInnerJoinCardinality() { - runPlannerTestFile("card-inner-join"); + runPlannerTestFile("card-inner-join", + ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY)); } /** @@ -70,7 +72,8 @@ public class PlannerTest extends PlannerTestBase { */ @Test public void testOuterJoinCardinality() { - runPlannerTestFile("card-outer-join"); + runPlannerTestFile("card-outer-join", + ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY)); } /** @@ -78,7 +81,8 @@ public class PlannerTest extends PlannerTestBase { */ @Test public void testMultiJoinCardinality() { - runPlannerTestFile("card-multi-join"); + runPlannerTestFile("card-multi-join", + ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY)); } @Test @@ -870,16 +874,16 @@ public class PlannerTest extends PlannerTestBase { // Single key string column with max length stat. HBaseColumn stringColwithSmallMaxSize = new HBaseColumn("", FeHBaseTable.Util.ROW_KEY_COLUMN_FAMILY, "", false, Type.STRING, "", 1); - stringColwithSmallMaxSize.getStats().update(ColumnStats.StatsKey.MAX_SIZE, - Long.valueOf(50)); + stringColwithSmallMaxSize.getStats().update( + Type.STRING, ColumnStats.StatsKey.MAX_SIZE, Long.valueOf(50)); assertEquals(HBaseScanNode.memoryEstimateForFetchingColumns(Lists .newArrayList(stringColwithSmallMaxSize)), 128); // Case that triggers the upper bound if some columns have stats are missing. HBaseColumn stringColwithLargeMaxSize = new HBaseColumn("", FeHBaseTable.Util.ROW_KEY_COLUMN_FAMILY, "", false, Type.STRING, "", 1); - stringColwithLargeMaxSize.getStats().update(ColumnStats.StatsKey.MAX_SIZE, - Long.valueOf(128 * 1024 * 1024)); + stringColwithLargeMaxSize.getStats().update( + Type.STRING, ColumnStats.StatsKey.MAX_SIZE, Long.valueOf(128 * 1024 * 1024)); assertEquals(HBaseScanNode.memoryEstimateForFetchingColumns(Lists.newArrayList( stringColwithLargeMaxSize, stringColWithoutStats)), 128 * 1024 * 1024); diff --git a/testdata/bin/compute-table-stats.sh b/testdata/bin/compute-table-stats.sh index 2d6733a..d7e8cf8 100755 --- a/testdata/bin/compute-table-stats.sh +++ b/testdata/bin/compute-table-stats.sh @@ -34,7 +34,7 @@ COMPUTE_STATS_SCRIPT="${IMPALA_HOME}/tests/util/compute_table_stats.py --impalad ${COMPUTE_STATS_SCRIPT} --db_names=functional\ --table_names="alltypes,alltypesagg,alltypesaggmultifilesnopart,alltypesaggnonulls, alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey,nulltable,nullrows, - date_tbl" + date_tbl,chars_medium" # We cannot load HBase on s3 and isilon yet. if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test index fb21e84..0987b27 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test @@ -8,7 +8,7 @@ select * from tpch.customer PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB row-size=218B cardinality=150.00K ==== # Predicate on a single value: card = |T|/ndv @@ -20,7 +20,7 @@ where c.c_custkey = 10 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey = 10 row-size=218B cardinality=1 ==== @@ -33,7 +33,7 @@ where c.c_nationkey = 10 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_nationkey = 10 row-size=218B cardinality=6.00K ==== @@ -45,7 +45,7 @@ where c.c_custkey = 10 OR c.c_custkey = 20 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey IN (10, 20) row-size=218B cardinality=2 ==== @@ -59,7 +59,7 @@ where c.c_custkey = 10 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey = 10 OR c.c_nationkey = 10 row-size=218B cardinality=6.00K ==== @@ -77,7 +77,7 @@ where t.id = 10 PLAN-ROOT SINK | 00:SCAN HDFS [functional.alltypestiny t] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B predicates: t.id = 10 OR t.bool_col = TRUE row-size=89B cardinality=5 ==== @@ -89,7 +89,7 @@ where c.c_custkey in (10, 20, 30) PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey IN (10, 20, 30) row-size=218B cardinality=3 ==== @@ -102,7 +102,7 @@ where c.c_custkey in (10, 20, 30, 30, 10, 20) PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey IN (10, 20, 30, 30, 10, 20) row-size=218B cardinality=6 ==== @@ -114,7 +114,7 @@ where c.c_custkey = 10 OR c.c_custkey = 10 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey = 10 row-size=218B cardinality=1 ==== @@ -128,7 +128,7 @@ where c.c_custkey = 10 OR 10 = c.c_custkey PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey IN (10, 10) row-size=218B cardinality=2 ==== @@ -149,7 +149,7 @@ where c.c_custkey = 10 AND c.c_custkey = 10 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey = 10 row-size=218B cardinality=1 ==== @@ -165,7 +165,7 @@ where id != 10 PLAN-ROOT SINK | 00:SCAN HDFS [functional.alltypestiny] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B predicates: id != 10 row-size=89B cardinality=1 ==== @@ -179,7 +179,7 @@ where c.c_custkey < 1234 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey < 1234 row-size=218B cardinality=15.00K ==== @@ -194,7 +194,7 @@ where c.c_custkey < 1234 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey < 1234 row-size=218B cardinality=15.00K ==== @@ -209,7 +209,7 @@ where c.c_custkey < 1234 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey < 1234 row-size=218B cardinality=15.00K ==== @@ -224,7 +224,7 @@ where c.c_custkey < 1234 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey < 1234, c.c_custkey < 2345 row-size=218B cardinality=15.00K ==== @@ -240,7 +240,7 @@ where c.c_custkey < 1234 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey < 1234, c.c_nationkey < 100 row-size=218B cardinality=15.00K ==== @@ -257,7 +257,7 @@ where c.c_custkey between 1234 and 2345 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey <= 2345, c.c_custkey >= 1234 row-size=218B cardinality=15.00K ==== @@ -278,7 +278,7 @@ where c.c_custkey >= 1234 and c.c_custkey <= 2345 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey <= 2345, c.c_custkey >= 1234 row-size=218B cardinality=15.00K ==== @@ -292,7 +292,7 @@ where c.c_custkey between 1234 and 2345 PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_custkey <= 2345, c.c_custkey >= 1234 row-size=218B cardinality=15.00K ==== @@ -309,7 +309,7 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional.alltypesagg a] partition predicates: a.`day` >= 6 - partitions=5/11 files=5 size=372.38KB + HDFS partitions=5/11 files=5 size=372.38KB row-size=95B cardinality=5.00K ==== # Partitioned table, one partition matches @@ -321,7 +321,7 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional.alltypesagg a] partition predicates: a.`day` = 6 - partitions=1/11 files=1 size=74.48KB + HDFS partitions=1/11 files=1 size=74.48KB row-size=95B cardinality=1.00K ==== # Partitioned table, no partitions match @@ -350,7 +350,7 @@ where c.c_mktsegment is null PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_mktsegment IS NULL row-size=218B cardinality=15.00K ==== @@ -364,7 +364,7 @@ where c.c_mktsegment is not null PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_mktsegment IS NOT NULL row-size=218B cardinality=15.00K ==== @@ -376,7 +376,7 @@ where tinyint_col is null PLAN-ROOT SINK | 00:SCAN HDFS [functional.alltypesagg] - partitions=11/11 files=11 size=814.73KB + HDFS partitions=11/11 files=11 size=814.73KB predicates: tinyint_col IS NULL row-size=95B cardinality=2.00K ==== @@ -388,7 +388,7 @@ where tinyint_col is not null PLAN-ROOT SINK | 00:SCAN HDFS [functional.alltypesagg] - partitions=11/11 files=11 size=814.73KB + HDFS partitions=11/11 files=11 size=814.73KB predicates: tinyint_col IS NOT NULL row-size=95B cardinality=9.00K ==== @@ -400,7 +400,7 @@ where concat(c.c_mktsegment, c_comment) is null PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: concat(c.c_mktsegment, c_comment) IS NULL row-size=218B cardinality=15.00K ==== @@ -414,7 +414,7 @@ where concat(c.c_mktsegment, c_comment) is not null PLAN-ROOT SINK | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: concat(c.c_mktsegment, c_comment) IS NOT NULL row-size=218B cardinality=15.00K ==== @@ -425,7 +425,7 @@ from functional.alltypesnopart PLAN-ROOT SINK | 00:SCAN HDFS [functional.alltypesnopart] - partitions=1/1 files=0 size=0B + HDFS partitions=1/1 files=0 size=0B row-size=72B cardinality=0 ==== # Filter on the no-stats table @@ -436,7 +436,17 @@ where int_col = 10 PLAN-ROOT SINK | 00:SCAN HDFS [functional.alltypesnopart] - partitions=1/1 files=0 size=0B + HDFS partitions=1/1 files=0 size=0B predicates: int_col = 10 row-size=72B cardinality=0 ==== +# VARCHAR column with stats +# Regression test for IMPALA-8849 - previously produced a negative row size. +select varchar_col from functional.chars_medium +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [functional.chars_medium] + HDFS partitions=1/1 files=1 size=320.68KB + row-size=15B cardinality=11.00K +==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test index 964b7f9..cfb9843 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test @@ -100,7 +100,7 @@ PLAN-ROOT SINK |--01:EMPTYSET | 00:SCAN HDFS [functional.alltypessmall f] - partitions=4/4 files=4 size=6.32KB + HDFS partitions=4/4 files=4 size=6.32KB runtime filters: RF000 -> f.id row-size=4B cardinality=100 ==== @@ -118,11 +118,11 @@ PLAN-ROOT SINK | row-size=89B cardinality=108 | |--02:SCAN HDFS [functional.alltypestiny] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=89B cardinality=8 | 01:SCAN HDFS [functional.alltypessmall] - partitions=4/4 files=4 size=6.32KB + HDFS partitions=4/4 files=4 size=6.32KB row-size=89B cardinality=100 ==== # Constant conjunct turns union into an empty-set node. @@ -145,7 +145,7 @@ PLAN-ROOT SINK |--01:EMPTYSET | 00:SCAN HDFS [functional.alltypes a] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB row-size=89B cardinality=7.30K ==== # Constant conjunct in the ON-clause of an outer join is @@ -163,11 +163,11 @@ PLAN-ROOT SINK | row-size=178B cardinality=100 | |--01:SCAN HDFS [functional.alltypestiny b] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=89B cardinality=8 | 00:SCAN HDFS [functional.alltypessmall a] - partitions=4/4 files=4 size=6.32KB + HDFS partitions=4/4 files=4 size=6.32KB row-size=89B cardinality=100 ==== # Constant conjunct in the ON-clause of an outer join is @@ -186,11 +186,11 @@ PLAN-ROOT SINK | row-size=178B cardinality=9 | |--01:SCAN HDFS [functional.alltypestiny b] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=89B cardinality=8 | 00:SCAN HDFS [functional.alltypessmall a] - partitions=4/4 files=4 size=6.32KB + HDFS partitions=4/4 files=4 size=6.32KB runtime filters: RF000 -> a.id row-size=89B cardinality=100 ==== @@ -209,11 +209,11 @@ PLAN-ROOT SINK | row-size=178B cardinality=108 | |--01:SCAN HDFS [functional.alltypestiny b] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=89B cardinality=8 | 00:SCAN HDFS [functional.alltypessmall a] - partitions=4/4 files=4 size=6.32KB + HDFS partitions=4/4 files=4 size=6.32KB row-size=89B cardinality=100 ==== # Limit 0 turns query block into an empty-set node. @@ -257,7 +257,7 @@ PLAN-ROOT SINK |--01:EMPTYSET | 00:SCAN HDFS [functional.alltypessmall f] - partitions=4/4 files=4 size=6.32KB + HDFS partitions=4/4 files=4 size=6.32KB runtime filters: RF000 -> f.id row-size=4B cardinality=100 ==== @@ -275,11 +275,11 @@ PLAN-ROOT SINK | row-size=89B cardinality=108 | |--02:SCAN HDFS [functional.alltypestiny] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=89B cardinality=8 | 01:SCAN HDFS [functional.alltypessmall] - partitions=4/4 files=4 size=6.32KB + HDFS partitions=4/4 files=4 size=6.32KB row-size=89B cardinality=100 ==== # Limit 0 causes empty-set union. @@ -308,7 +308,7 @@ PLAN-ROOT SINK | row-size=8B cardinality=11.00K | |--03:SCAN HDFS [functional.alltypesagg] -| partitions=11/11 files=11 size=814.73KB +| HDFS partitions=11/11 files=11 size=814.73KB | row-size=4B cardinality=11.00K | 02:AGGREGATE [FINALIZE] @@ -428,14 +428,14 @@ PLAN-ROOT SINK | 02:UNION | pass-through-operands: all -| row-size=2B cardinality=0 +| row-size=4B cardinality=0 | 01:AGGREGATE [FINALIZE] | group by: lead(-496, 81, NULL) OVER(...) -| row-size=2B cardinality=0 +| row-size=4B cardinality=0 | 00:UNION - row-size=2B cardinality=0 + row-size=4B cardinality=0 ==== # IMPALA-2088: Test empty union operands with analytic functions. select lead(-496, 81) over (order by t1.double_col desc, t1.id asc) @@ -456,18 +456,18 @@ PLAN-ROOT SINK 02:UNION | constant-operands=1 | pass-through-operands: 01 -| row-size=2B cardinality=9 +| row-size=16B cardinality=9 | |--03:SCAN HDFS [functional.alltypestiny] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=8B cardinality=8 | 01:AGGREGATE [FINALIZE] | group by: lead(-496, 81, NULL) OVER(...) -| row-size=2B cardinality=0 +| row-size=16B cardinality=0 | 00:UNION - row-size=2B cardinality=0 + row-size=16B cardinality=0 ==== # IMPALA-2216: Make sure the final output exprs are substituted, even # if the resulting plan is an EmptySetNode. @@ -556,7 +556,7 @@ PLAN-ROOT SINK | 03:EMPTYSET | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=288.98MB predicates: c_custkey < 10 row-size=56B cardinality=15.00K ==== @@ -598,7 +598,7 @@ PLAN-ROOT SINK | row-size=8B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=288.98MB predicates: c_custkey = 1 row-size=44B cardinality=1 ==== @@ -632,6 +632,6 @@ PLAN-ROOT SINK |--01:EMPTYSET | 00:SCAN HDFS [functional.alltypes x] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB row-size=0B cardinality=7.30K ==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test index e76170a..ce80222 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test +++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test @@ -549,7 +549,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE 'id','INT',2915,0,4,4 'ch1','CHAR(1)',1,0,1,1 'ch2','CHAR(8)',10,0,8,8 -'ch3','CHAR(20)',10,0,8,8 +'ch3','CHAR(20)',10,0,20,20 'ts','TIMESTAMP',2871,0,16,16 'vc1','VARCHAR(1)',1,0,1,1 'vc2','VARCHAR(8)',10,0,8,8 @@ -578,8 +578,8 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',2915,1,4,4 'ch1','CHAR(1)',2,0,1,1 -'ch2','CHAR(8)',11,0,8,7.99766731262207 -'ch3','CHAR(20)',11,0,8,7.99766731262207 +'ch2','CHAR(8)',11,0,8,8 +'ch3','CHAR(20)',11,0,20,20 'ts','TIMESTAMP',2871,1,16,16 'vc1','VARCHAR(1)',2,0,1,1 'vc2','VARCHAR(8)',11,0,8,7.99766731262207 diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test index 01e5aa8..38d1024 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test +++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test @@ -794,7 +794,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE 'id','INT',2915,0,4,4 'ch1','CHAR(1)',1,0,1,1 'ch2','CHAR(8)',10,0,8,8 -'ch3','CHAR(20)',10,0,8,8 +'ch3','CHAR(20)',10,0,20,20 'ts','TIMESTAMP',2871,0,16,16 'vc1','VARCHAR(1)',1,0,1,1 'vc2','VARCHAR(8)',10,0,8,8