This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch branch-4.0.1 in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8862719d87ac5dc214985025463f002d41b15672 Author: liuyao <[email protected]> AuthorDate: Mon Apr 26 14:37:43 2021 +0800 IMPALA-7560: Set selectivity of Not-equal Calculate binary predicate selectivity if one of the children is a slotref and the other children are all constant. eg. something like "col != 5", but not "2 * col != 10" selectivity = 1 - 1/ndv Testing: Modify the function testNeSelectivity() of the ExprCardinalityTest.java, change -1 to the correct value. Change-Id: Icd6f5945840ea2a8194d72aa440ddfa6915cbb3a Reviewed-on: http://gerrit.cloudera.org:8080/17344 Reviewed-by: Qifan Chen <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Zoltan Borok-Nagy <[email protected]> --- .../apache/impala/analysis/BinaryPredicate.java | 41 +++++++++++-- .../impala/analysis/ExprCardinalityTest.java | 71 +++++++++------------- .../org/apache/impala/planner/CardinalityTest.java | 4 +- .../queries/PlannerTest/analytic-fns.test | 4 +- .../queries/PlannerTest/card-scan.test | 4 +- .../queries/PlannerTest/hbase.test | 8 +-- .../queries/PlannerTest/inline-view-limit.test | 4 +- .../queries/PlannerTest/kudu.test | 4 +- .../queries/PlannerTest/predicate-propagation.test | 8 +-- .../queries/PlannerTest/tpch-all.test | 50 +++++++-------- .../queries/PlannerTest/tpch-kudu.test | 2 +- .../queries/PlannerTest/tpch-nested.test | 8 +-- .../queries/PlannerTest/tpch-views.test | 2 +- 13 files changed, 110 insertions(+), 100 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java index 9092058..83ab86c 100644 --- a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.List; import org.apache.impala.catalog.Db; +import org.apache.impala.catalog.FeTable; import org.apache.impala.catalog.Function.CompareMode; import org.apache.impala.catalog.ScalarFunction; import org.apache.impala.catalog.Type; @@ -237,14 +238,42 @@ public class BinaryPredicate extends Predicate { // TODO: Compute selectivity for nested predicates. // TODO: Improve estimation using histograms. Reference<SlotRef> slotRefRef = new Reference<SlotRef>(); - if ((op_ == Operator.EQ || op_ == Operator.NOT_DISTINCT) - && isSingleColumnPredicate(slotRefRef, null)) { - long distinctValues = slotRefRef.getRef().getNumDistinctValues(); - if (distinctValues > 0) { - selectivity_ = 1.0 / distinctValues; - selectivity_ = Math.max(0, Math.min(1, selectivity_)); + if (!isSingleColumnPredicate(slotRefRef, null)) { + return; + } + long distinctValues = slotRefRef.getRef().getNumDistinctValues(); + if (distinctValues < 0) { + // Lack of statistics to estimate the selectivity. + return; + } else if (distinctValues == 0 && (op_ == Operator.EQ || op_ == Operator.NE)) { + // If the table is empty, then distinctValues is 0. This case is equivalent + // to selectivity is 0. + selectivity_ = 0.0; + return; + } + + if (op_ == Operator.EQ || op_ == Operator.NOT_DISTINCT) { + selectivity_ = 1.0 / distinctValues; + } else if (op_ == Operator.NE || op_ == Operator.DISTINCT_FROM) { + // For case <column> IS DISTINCT FROM NULL, all non-null values are true + if (Expr.IS_NULL_LITERAL.apply(getChild(1)) && op_ == Operator.DISTINCT_FROM) { + selectivity_ = 1.0; + } else { + selectivity_ = 1.0 - 1.0 / distinctValues; } + SlotDescriptor slotDesc = slotRefRef.getRef().getDesc(); + if (slotDesc.getStats().hasNullsStats()) { + FeTable table = slotDesc.getParent().getTable(); + if (table != null && table.getNumRows() > 0) { + long numRows = table.getNumRows(); + selectivity_ *= + (double) (numRows - slotDesc.getStats().getNumNulls()) / numRows; + } + } + } else { + return; } + selectivity_ = Math.max(0, Math.min(1, selectivity_)); } @Override diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java index 061e4a9..efd0378 100644 --- a/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java @@ -314,23 +314,16 @@ public class ExprCardinalityTest { */ @Test public void testNeSelectivity() throws ImpalaException { - // Bug: No estimated selectivity for != (IMPALA-8039) - //verifySelectExpr("alltypes", "id != 10", 3, 1 - 1.0/7300); - verifySelectExpr("alltypes", "id != 10", 3, -1); - //verifySelectExpr("alltypes", "bool_col != true", 3, 1 - 1.0/2); - verifySelectExpr("alltypes", "bool_col != true", 3, -1); - //verifySelectExpr("alltypes", "int_col != 10", 3, 1 - 1.0/10); - verifySelectExpr("alltypes", "int_col != 10", 3, -1); - - //verifySelectExpr("nullrows", "id != 'foo'", 3, 1 - 1.0/26); - verifySelectExpr("nullrows", "id != 'foo'", 3, -1); - // Bug: All nulls, so NDV should = 1, so Sel should be 1 - 1.0/1 - //verifySelectExpr("nullrows", "null_str != 'foo'", 3, 1 - 1.0/1); - verifySelectExpr("nullrows", "null_str != 'foo'", 3, -1); - //verifySelectExpr("nullrows", "group_str != 'foo'", 3, 1 - 1.0/6); - verifySelectExpr("nullrows", "group_str != 'foo'", 3, -1); - //verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, 1 - 1.0/7); - verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, -1); + verifySelectExpr("alltypes", "id != 10", 3, 1 - 1.0/7300); + verifySelectExpr("alltypes", "bool_col != true", 3, 1 - 1.0/2); + verifySelectExpr("alltypes", "int_col != 10", 3, 1 - 1.0/10); + verifySelectExpr("nullrows", "id != 'foo'", 3, 1 - 1.0/26); + verifySelectExpr("nullrows", "null_str != 'foo'", 3, 1 - 1.0/1); + verifySelectExpr("nullrows", "group_str != 'foo'", 3, 1 - 1.0/6); + verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, (1 - 1.0/6)*6/26); + // field has no statistics. + verifySelectExpr("emptytable", "field != 'foo'", 3, -1); + verifySelectExpr("emptytable", "f2 != 10", 3, 0.0); // Bug: Sel should default to 1 - good old 0.1 verifySelectExpr("manynulls", "id != 10", 3, -1); @@ -343,36 +336,28 @@ public class ExprCardinalityTest { */ @Test public void testDistinctSelectivity() throws ImpalaException { - // BUG: IS DISTINCT has no selectivity - //verifySelectExpr("alltypes", "id is distinct from 10", 3, 1 - 1.0/7300); - verifySelectExpr("alltypes", "id is distinct from 10", 3, -1); + + verifySelectExpr("alltypes", "id is distinct from 10", 3, 1 - 1.0/7300); // Bug: does not treat NULL specially // Bug: NDV sould be 2 since IS DISTINCT won't return NULL //verifySelectExpr("alltypes", "id is distinct from null", 2, 1); - verifySelectExpr("alltypes", "id is distinct from null", 3, -1); - //verifySelectExpr("alltypes", "bool_col is distinct from true", 3, 1 - 1.0/2); - verifySelectExpr("alltypes", "bool_col is distinct from true", 3, -1); - //verifySelectExpr("alltypes", "bool_col is distinct from null", 2, 1); - verifySelectExpr("alltypes", "bool_col is distinct from null", 3, -1); - //verifySelectExpr("alltypes", "int_col is distinct from 10", 3, 1 - 1.0/10); - verifySelectExpr("alltypes", "int_col is distinct from 10", 3, -1); - //verifySelectExpr("alltypes", "int_col is distinct from null", 2, 1); - verifySelectExpr("alltypes", "int_col is distinct from null", 3, -1); - - //verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, 1 - 1.0/26); - verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, -1); - //verifySelectExpr("nullrows", "id is distinct from null", 2, 1); - verifySelectExpr("nullrows", "id is distinct from null", 3, -1); - // Bug: All nulls, so NDV should = 1, so Sel should be 1.0/1 - //verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 2, 1 - 1.0/1); - verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 3, -1); - verifySelectExpr("nullrows", "null_str is distinct from null", 3, -1); - //verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, 1 - 1.0/6); - verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, -1); - //verifySelectExpr("nullrows", "group_str is distinct from null", 2, 0); - verifySelectExpr("nullrows", "group_str is distinct from null", 3, -1); - //verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1 - 1.0/7); + verifySelectExpr("alltypes", "id is distinct from null", 3, 1); + verifySelectExpr("alltypes", "bool_col is distinct from true", 3, 1 - 1.0/2); + + verifySelectExpr("alltypes", "bool_col is distinct from null", 3, 1); + verifySelectExpr("alltypes", "int_col is distinct from 10", 3, 1 - 1.0/10); + + verifySelectExpr("alltypes", "int_col is distinct from null", 3, 1); + verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, 1 - 1.0/26); + + verifySelectExpr("nullrows", "id is distinct from null", 3, 1); + verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 3, 1 - 1.0/1); + verifySelectExpr("nullrows", "null_str is distinct from null", 3, 0); + verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, 1 - 1.0/6); + verifySelectExpr("nullrows", "group_str is distinct from null", 3, 1); + verifySelectExpr("nullrows", "group_str is distinct from null", 3, 1); verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1.0/6); + verifySelectExpr("nullrows", "some_nulls is distinct from null", 3, 6.0/26.0); // Bug: Sel should default to 1 - good old 0.1 verifySelectExpr("manynulls", "id is distinct from 10", 3, -1); diff --git a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java index f634b28..91fc296 100644 --- a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java +++ b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java @@ -67,10 +67,8 @@ public class CardinalityTest extends PlannerTestBase { verifyCardinality( "SELECT id FROM functional.alltypes WHERE int_col = 1", 7300/10); - // Assume classic 0.1 selectivity for other operators - // IMPALA-7560 says this should be revised. verifyCardinality( - "SELECT id FROM functional.alltypes WHERE int_col != 1", 730); + "SELECT id FROM functional.alltypes WHERE int_col != 1", 6570); // IMPALA-7601 says the following should be revised. verifyCardinality( diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test index 593ac72..a01b401 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test @@ -2086,7 +2086,7 @@ PLAN-ROOT SINK | 05:SELECT | predicates: id IS NULL, tinyint_col != 5 -| row-size=9B cardinality=730 +| row-size=9B cardinality=699 | 00:UNION | row-size=9B cardinality=7.30K @@ -2124,7 +2124,7 @@ PLAN-ROOT SINK | 05:SELECT | predicates: id IS NULL, tinyint_col != 5 -| row-size=9B cardinality=730 +| row-size=9B cardinality=699 | 00:UNION | row-size=9B cardinality=7.30K diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test index cf4c316..20aee39 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test @@ -156,8 +156,6 @@ PLAN-ROOT SINK # Not-equal, card = 1 - 1/ndv # Use smaller table so effect is clear # |T|=8, NDV=8 -# Bug: IMPALA-8039 -# Bug, expected cardinality ~7 select * from functional.alltypestiny where id != 10 @@ -167,7 +165,7 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional.alltypestiny] HDFS partitions=4/4 files=4 size=460B predicates: id != 10 - row-size=89B cardinality=1 + row-size=89B cardinality=7 ==== # Inequality. No useful stats. # Bug: IMPALA-8037, Assumes sel = 0.1 diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test index 5a26b0f..f829753 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test @@ -411,10 +411,10 @@ PLAN-ROOT SINK | 00:SCAN HBASE [functional_hbase.alltypessmall] hbase filters: - d:string_col NOT_EQUAL '2' d:string_col GREATER_OR_EQUAL '4' d:date_string_col EQUAL '04/03/09' - predicates: string_col != '2', string_col >= '4', date_string_col = '04/03/09' + d:string_col NOT_EQUAL '2' + predicates: string_col >= '4', date_string_col = '04/03/09', string_col != '2' row-size=89B cardinality=1 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK @@ -423,10 +423,10 @@ PLAN-ROOT SINK | 00:SCAN HBASE [functional_hbase.alltypessmall] hbase filters: - d:string_col NOT_EQUAL '2' d:string_col GREATER_OR_EQUAL '4' d:date_string_col EQUAL '04/03/09' - predicates: string_col != '2', string_col >= '4', date_string_col = '04/03/09' + d:string_col NOT_EQUAL '2' + predicates: string_col >= '4', date_string_col = '04/03/09', string_col != '2' row-size=89B cardinality=1 ==== # mix of predicates and functional_hbase. filters diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test index 266d528..514bf0d 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test @@ -708,7 +708,7 @@ PLAN-ROOT SINK | 02:SCAN HDFS [functional.alltypessmall] partitions=4/4 files=4 size=6.32KB - predicates: functional.alltypessmall.id != 1, functional.alltypessmall.id > 10, functional.alltypessmall.id > 20, id != 2 + predicates: functional.alltypessmall.id > 10, functional.alltypessmall.id > 20, functional.alltypessmall.id != 1, id != 2 runtime filters: RF000 -> id row-size=4B cardinality=10 ==== @@ -772,7 +772,7 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB - predicates: functional.alltypes.id != 2, functional.alltypes.id > 10, functional.alltypes.id > 20, id != 1 + predicates: functional.alltypes.id > 10, functional.alltypes.id > 20, functional.alltypes.id != 2, id != 1 runtime filters: RF000 -> id row-size=4B cardinality=730 ==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test index 824188a..6bdcc41 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test @@ -143,7 +143,7 @@ and zip > 1 and zip < 50 PLAN-ROOT SINK | 00:SCAN KUDU [functional_kudu.testtbl] - kudu predicates: id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0, name = 'foo' + kudu predicates: name = 'foo', id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0 row-size=28B cardinality=0 ---- SCANRANGELOCATIONS NODE 0: @@ -154,7 +154,7 @@ PLAN-ROOT SINK 01:EXCHANGE [UNPARTITIONED] | 00:SCAN KUDU [functional_kudu.testtbl] - kudu predicates: id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0, name = 'foo' + kudu predicates: name = 'foo', id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0 row-size=28B cardinality=0 ==== # Constant propagation works for Kudu diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test index b977b98..74ff2b4 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test @@ -585,7 +585,7 @@ PLAN-ROOT SINK | 01:SCAN HDFS [functional.alltypessmall] | partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2 | HDFS partitions=2/4 files=2 size=3.16KB -| predicates: functional.alltypessmall.int_col != 5, id > 11 +| predicates: id > 11, functional.alltypessmall.int_col != 5 | row-size=16B cardinality=5 | 00:SCAN HDFS [functional.alltypes a] @@ -629,7 +629,7 @@ PLAN-ROOT SINK | 01:SCAN HDFS [functional.alltypessmall] | partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2 | HDFS partitions=2/4 files=2 size=3.16KB -| predicates: functional.alltypessmall.int_col != 5, id > 11 +| predicates: id > 11, functional.alltypessmall.int_col != 5 | row-size=16B cardinality=5 | 00:SCAN HDFS [functional.alltypes a] @@ -672,7 +672,7 @@ PLAN-ROOT SINK | 01:SCAN HDFS [functional.alltypessmall] | partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2 | HDFS partitions=2/4 files=2 size=3.16KB -| predicates: functional.alltypessmall.int_col != 5, id > 11 +| predicates: id > 11, functional.alltypessmall.int_col != 5 | row-size=16B cardinality=5 | 00:SCAN HDFS [functional.alltypes a] @@ -709,7 +709,7 @@ PLAN-ROOT SINK | 01:SCAN HDFS [functional.alltypessmall] | partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2 | HDFS partitions=2/4 files=2 size=3.16KB -| predicates: functional.alltypessmall.int_col != 5, id > 11 +| predicates: id > 11, functional.alltypessmall.int_col != 5 | row-size=16B cardinality=5 | 00:SCAN HDFS [functional.alltypes a] diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test index 917e5c1..e54f6a3 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test @@ -3453,20 +3453,20 @@ PLAN-ROOT SINK | 07:SORT | order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 06:AGGREGATE [FINALIZE] | output: count(ps_suppkey) | group by: p_brand, p_type, p_size -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 05:AGGREGATE | group by: p_brand, p_type, p_size, ps_suppkey -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 04:HASH JOIN [NULL AWARE LEFT ANTI JOIN] | hash predicates: ps_suppkey = s_suppkey -| row-size=81B cardinality=31.92K +| row-size=81B cardinality=31.49K | |--02:SCAN HDFS [tpch.supplier] | HDFS partitions=1/1 files=1 size=1.33MB @@ -3476,12 +3476,12 @@ PLAN-ROOT SINK 03:HASH JOIN [INNER JOIN] | hash predicates: ps_partkey = p_partkey | runtime filters: RF000 <- p_partkey -| row-size=81B cardinality=31.92K +| row-size=81B cardinality=31.49K | |--01:SCAN HDFS [tpch.part] | HDFS partitions=1/1 files=1 size=22.83MB -| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%' -| row-size=65B cardinality=8.00K +| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45' +| row-size=65B cardinality=7.89K | 00:SCAN HDFS [tpch.partsupp] HDFS partitions=1/1 files=1 size=112.71MB @@ -3497,33 +3497,33 @@ PLAN-ROOT SINK | 07:SORT | order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 13:AGGREGATE [FINALIZE] | output: count:merge(ps_suppkey) | group by: p_brand, p_type, p_size -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 12:EXCHANGE [HASH(p_brand,p_type,p_size)] | 06:AGGREGATE [STREAMING] | output: count(ps_suppkey) | group by: p_brand, p_type, p_size -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 11:AGGREGATE | group by: p_brand, p_type, p_size, ps_suppkey -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 10:EXCHANGE [HASH(p_brand,p_type,p_size,ps_suppkey)] | 05:AGGREGATE [STREAMING] | group by: p_brand, p_type, p_size, ps_suppkey -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 04:HASH JOIN [NULL AWARE LEFT ANTI JOIN, BROADCAST] | hash predicates: ps_suppkey = s_suppkey -| row-size=81B cardinality=31.92K +| row-size=81B cardinality=31.49K | |--09:EXCHANGE [BROADCAST] | | @@ -3535,14 +3535,14 @@ PLAN-ROOT SINK 03:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: ps_partkey = p_partkey | runtime filters: RF000 <- p_partkey -| row-size=81B cardinality=31.92K +| row-size=81B cardinality=31.49K | |--08:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [tpch.part] | HDFS partitions=1/1 files=1 size=22.83MB -| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%' -| row-size=65B cardinality=8.00K +| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45' +| row-size=65B cardinality=7.89K | 00:SCAN HDFS [tpch.partsupp] HDFS partitions=1/1 files=1 size=112.71MB @@ -3558,33 +3558,33 @@ PLAN-ROOT SINK | 07:SORT | order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 13:AGGREGATE [FINALIZE] | output: count:merge(ps_suppkey) | group by: p_brand, p_type, p_size -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 12:EXCHANGE [HASH(p_brand,p_type,p_size)] | 06:AGGREGATE [STREAMING] | output: count(ps_suppkey) | group by: p_brand, p_type, p_size -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 11:AGGREGATE | group by: p_brand, p_type, p_size, ps_suppkey -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 10:EXCHANGE [HASH(p_brand,p_type,p_size,ps_suppkey)] | 05:AGGREGATE [STREAMING] | group by: p_brand, p_type, p_size, ps_suppkey -| row-size=65B cardinality=31.92K +| row-size=65B cardinality=31.49K | 04:HASH JOIN [NULL AWARE LEFT ANTI JOIN, BROADCAST] | hash predicates: ps_suppkey = s_suppkey -| row-size=81B cardinality=31.92K +| row-size=81B cardinality=31.49K | |--JOIN BUILD | | join-table-id=00 plan-id=01 cohort-id=01 @@ -3599,7 +3599,7 @@ PLAN-ROOT SINK | 03:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: ps_partkey = p_partkey -| row-size=81B cardinality=31.92K +| row-size=81B cardinality=31.49K | |--JOIN BUILD | | join-table-id=01 plan-id=02 cohort-id=01 @@ -3610,8 +3610,8 @@ PLAN-ROOT SINK | | | 01:SCAN HDFS [tpch.part] | HDFS partitions=1/1 files=1 size=22.83MB -| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%' -| row-size=65B cardinality=8.00K +| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45' +| row-size=65B cardinality=7.89K | 00:SCAN HDFS [tpch.partsupp] HDFS partitions=1/1 files=1 size=112.71MB diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test index b5f85ab..b851b4a 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test @@ -1228,7 +1228,7 @@ PLAN-ROOT SINK | row-size=89B cardinality=31.92K | |--01:SCAN KUDU [tpch_kudu.part] -| predicates: p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%' +| predicates: NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45' | kudu predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9) | row-size=73B cardinality=8.00K | diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test index 94759c8..c5a158d 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test @@ -2307,8 +2307,8 @@ PLAN-ROOT SINK | |--05:SCAN HDFS [tpch_nested_parquet.part p] | HDFS partitions=1/1 files=1 size=6.24MB -| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%' -| row-size=65B cardinality=8.00K +| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45' +| row-size=65B cardinality=7.89K | 01:SUBPLAN | row-size=103B cardinality=10.00K @@ -2368,8 +2368,8 @@ PLAN-ROOT SINK | | | 05:SCAN HDFS [tpch_nested_parquet.part p] | HDFS partitions=1/1 files=1 size=6.24MB -| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%' -| row-size=65B cardinality=8.00K +| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45' +| row-size=65B cardinality=7.89K | 01:SUBPLAN | row-size=103B cardinality=10.00K diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test index adc56aa..ff599c0 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test @@ -1222,7 +1222,7 @@ PLAN-ROOT SINK | |--01:SCAN HDFS [tpch.part] | HDFS partitions=1/1 files=1 size=22.83MB -| predicates: tpch.part.p_size IN (49, 14, 23, 45, 19, 3, 36, 9), tpch.part.p_brand != 'Brand#45', NOT tpch.part.p_type LIKE 'MEDIUM POLISHED%' +| predicates: tpch.part.p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT tpch.part.p_type LIKE 'MEDIUM POLISHED%', tpch.part.p_brand != 'Brand#45' | row-size=65B cardinality=8.00K | 00:SCAN HDFS [tpch.partsupp]
