This is an automated email from the ASF dual-hosted git repository. jhyde pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push: new 60aa74f [CALCITE-4237] Following [CALCITE-4173], fix assertion error when RexSimplify generates Sarg with single null only 60aa74f is described below commit 60aa74f4e5b3b01ad90cb9ee9a4520483d45b2b4 Author: Vladimir Sitnikov <sitnikov.vladi...@gmail.com> AuthorDate: Sun Sep 6 19:19:02 2020 +0300 [CALCITE-4237] Following [CALCITE-4173], fix assertion error when RexSimplify generates Sarg with single null only Fix TpcdsTest expected output. Add a test for IS NULL Sarg (Julian Hyde). Close apache/calcite#2144 Close apache/calcite#2141 --- .../java/org/apache/calcite/rex/RexSimplify.java | 16 ++++-- .../org/apache/calcite/rex/RexProgramTest.java | 15 ++++++ .../apache/calcite/adapter/tpcds/TpcdsTest.java | 58 +++++++++++----------- 3 files changed, 57 insertions(+), 32 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java index 464825e..63b3a32 100644 --- a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java +++ b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java @@ -2543,8 +2543,9 @@ public class RexSimplify { if (negate) { return false; } - return accept1(((RexCall) e).operands.get(0), e.getKind(), - rexBuilder.makeNullLiteral(e.getType()), newTerms); + final RexNode arg = ((RexCall) e).operands.get(0); + return accept1(arg, e.getKind(), + rexBuilder.makeNullLiteral(arg.getType()), newTerms); default: return false; } @@ -2612,7 +2613,7 @@ public class RexSimplify { return true; case IS_NULL: if (negate) { - throw new AssertionError(); + throw new AssertionError("negate is not supported for IS_NULL"); } b.containsNull = true; return true; @@ -2690,7 +2691,14 @@ public class RexSimplify { } @Override public RelDataType getType() { - return rexBuilder.typeFactory.leastRestrictive(Util.distinctList(types)); + if (this.types.isEmpty()) { + // Expression is "x IS NULL" + return ref.getType(); + } + final List<RelDataType> distinctTypes = Util.distinctList(this.types); + return Objects.requireNonNull( + rexBuilder.typeFactory.leastRestrictive(distinctTypes), + () -> "Can't find leastRestrictive type among " + distinctTypes); } @Override public <R> R accept(RexVisitor<R> visitor) { diff --git a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java index 1b836f2..b888834 100644 --- a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java +++ b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java @@ -1640,6 +1640,21 @@ class RexProgramTest extends RexProgramTestBase { .expandedSearch(expanded); } + @Test void testSimplifyRange6() { + // An IS NULL condition would not usually become a Sarg, + // but here it is combined with another condition, and together they cross + // the complexity threshold. + final RexNode aRef = input(tInt(true), 0); + final RexNode bRef = input(tInt(true), 1); + // a in (1, 2) or b is null + RexNode expr = or(eq(aRef, literal(1)), eq(aRef, literal(2)), isNull(bRef)); + final String simplified = + "OR(SEARCH($1, Sarg[, null]), SEARCH($0, Sarg[1, 2]))"; + final String expanded = "OR(IS NULL($1), OR(=($0, 1), =($0, 2)))"; + checkSimplify(expr, simplified) + .expandedSearch(expanded); + } + @Test void testSimplifyItemRangeTerms() { RexNode item = item(input(tArray(tInt()), 3), literal(1)); // paranoid validation doesn't support array types, disable it for a moment diff --git a/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java b/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java index dec8206..02295a3 100644 --- a/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java +++ b/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java @@ -21,7 +21,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.runtime.Hook; import org.apache.calcite.schema.SchemaPlus; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.test.CalciteAssert; import org.apache.calcite.tools.Frameworks; @@ -220,27 +219,27 @@ class TpcdsTest { .withHook(Hook.PROGRAM, handler(true, 2)) .explainMatches("including all attributes ", CalciteAssert.checkMaskedResultContains("" - + "EnumerableCalc(expr#0..9=[{inputs}], expr#10=[/($t4, $t3)], expr#11=[CAST($t10):INTEGER NOT NULL], expr#12=[*($t4, $t4)], expr#13=[/($t12, $t3)], expr#14=[-($t5, $t13)], expr#15=[1], expr#16=[=($t3, $t15)], expr#17=[null:BIGINT], expr#18=[-($t3, $t15)], expr#19=[CASE($t16, $t17, $t18)], expr#20=[/($t14, $t19)], expr#21=[0.5:DECIMAL(2, 1)], expr#22=[POWER($t20, $t21)], expr#23=[CAST($t22):INTEGER NOT NULL], expr#24=[/($t23, $t11)], expr#25=[/($t6, $t3)], expr#26=[CAST($ [...] - + " EnumerableLimit(fetch=[100]): rowcount = 100.0, cumulative cost = {1.2435775409784036E28 rows, 2.555295485909236E30 cpu, 0.0 io}\n" - + " EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): rowcount = 5.434029018852197E26, cumulative cost = {1.2435775409784036E28 rows, 2.555295485909236E30 cpu, 0.0 io}\n" - + " EnumerableAggregate(group=[{0, 1, 2}], STORE_SALES_QUANTITYCOUNT=[COUNT()], agg#1=[$SUM0($3)], agg#2=[$SUM0($6)], agg#3=[$SUM0($4)], agg#4=[$SUM0($7)], agg#5=[$SUM0($5)], agg#6=[$SUM0($8)]): rowcount = 5.434029018852197E26, cumulative cost = {1.1892372507898816E28 rows, 1.2172225002228922E30 cpu, 0.0 io}\n" - + " EnumerableCalc(expr#0..211=[{inputs}], expr#212=[*($t89, $t89)], expr#213=[*($t140, $t140)], expr#214=[*($t196, $t196)], I_ITEM_ID=[$t58], I_ITEM_DESC=[$t61], S_STATE=[$t24], SS_QUANTITY=[$t89], SR_RETURN_QUANTITY=[$t140], CS_QUANTITY=[$t196], $f6=[$t212], $f7=[$t213], $f8=[$t214]): rowcount = 5.434029018852197E27, cumulative cost = {1.0873492066864028E28 rows, 1.2172225002228922E30 cpu, 0.0 io}\n" - + " EnumerableHashJoin(condition=[AND(=($82, $133), =($81, $132), =($88, $139))], joinType=[inner]): rowcount = 5.434029018852197E27, cumulative cost = {5.439463048011832E27 rows, 1.8506796E7 cpu, 0.0 io}\n" - + " EnumerableHashJoin(condition=[=($0, $86)], joinType=[inner]): rowcount = 2.3008402586892598E13, cumulative cost = {4.8588854672854766E13 rows, 7281360.0 cpu, 0.0 io}\n" + + "EnumerableCalc(expr#0..9=[{inputs}], expr#10=[/($t4, $t3)], expr#11=[CAST($t10):INTEGER NOT NULL], expr#12=[*($t4, $t4)], expr#13=[/($t12, $t3)], expr#14=[-($t5, $t13)], expr#15=[1], expr#16=[=($t3, $t15)], expr#17=[null:BIGINT], expr#18=[-($t3, $t15)], expr#19=[CASE($t16, $t17, $t18)], expr#20=[/($t14, $t19)], expr#21=[0.5:DECIMAL(2, 1)], expr#22=[POWER($t20, $t21)], expr#23=[CAST($t22):INTEGER NOT NULL], expr#24=[/($t23, $t11)], expr#25=[/($t6, $t3)], expr#26=[CAST($ [...] + + " EnumerableLimit(fetch=[100]): rowcount = 100.0, cumulative cost = {2.0726292349546726E28 rows, 4.277331418269512E30 cpu, 0.0 io}\n" + + " EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): rowcount = 9.056715031420328E26, cumulative cost = {2.0726292349546726E28 rows, 4.277331418269512E30 cpu, 0.0 io}\n" + + " EnumerableAggregate(group=[{0, 1, 2}], STORE_SALES_QUANTITYCOUNT=[COUNT()], agg#1=[$SUM0($3)], agg#2=[$SUM0($6)], agg#3=[$SUM0($4)], agg#4=[$SUM0($7)], agg#5=[$SUM0($5)], agg#6=[$SUM0($8)]): rowcount = 9.056715031420328E26, cumulative cost = {1.982062084640469E28 rows, 2.0287041670381534E30 cpu, 0.0 io}\n" + + " EnumerableCalc(expr#0..211=[{inputs}], expr#212=[*($t89, $t89)], expr#213=[*($t140, $t140)], expr#214=[*($t196, $t196)], I_ITEM_ID=[$t30], I_ITEM_DESC=[$t33], S_STATE=[$t24], SS_QUANTITY=[$t89], SR_RETURN_QUANTITY=[$t140], CS_QUANTITY=[$t196], $f6=[$t212], $f7=[$t213], $f8=[$t214]): rowcount = 9.056715031420328E27, cumulative cost = {1.8122486778013382E28 rows, 2.0287041670381534E30 cpu, 0.0 io}\n" + + " EnumerableHashJoin(condition=[AND(=($82, $133), =($81, $132), =($88, $139))], joinType=[inner]): rowcount = 9.056715031420328E27, cumulative cost = {9.065771746593055E27 rows, 1.7776306E7 cpu, 0.0 io}\n" + + " EnumerableHashJoin(condition=[=($0, $86)], joinType=[inner]): rowcount = 3.8347337644821E13, cumulative cost = {8.097127760662506E13 rows, 7281360.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, STORE]]): rowcount = 12.0, cumulative cost = {12.0 rows, 13.0 cpu, 0.0 io}\n" - + " EnumerableHashJoin(condition=[=($0, $50)], joinType=[inner]): rowcount = 1.2782445881607E13, cumulative cost = {1.279800620431234E13 rows, 7281347.0 cpu, 0.0 io}\n" - + " EnumerableCalc(expr#0..27=[{inputs}], expr#28=['1998Q1'], expr#29=[=($t15, $t28)], proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 10957.35, cumulative cost = {84006.35 rows, 4382941.0 cpu, 0.0 io}\n" - + " EnumerableTableScan(table=[[TPCDS, DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 0.0 io}\n" - + " EnumerableHashJoin(condition=[=($0, $24)], joinType=[inner]): rowcount = 7.7770908E9, cumulative cost = {7.783045975286664E9 rows, 2898406.0 cpu, 0.0 io}\n" - + " EnumerableTableScan(table=[[TPCDS, ITEM]]): rowcount = 18000.0, cumulative cost = {18000.0 rows, 18001.0 cpu, 0.0 io}\n" + + " EnumerableHashJoin(condition=[=($0, $52)], joinType=[inner]): rowcount = 2.1304076469345E13, cumulative cost = {2.1319863492416234E13 rows, 7281347.0 cpu, 0.0 io}\n" + + " EnumerableTableScan(table=[[TPCDS, ITEM]]): rowcount = 18000.0, cumulative cost = {18000.0 rows, 18001.0 cpu, 0.0 io}\n" + + " EnumerableHashJoin(condition=[=($0, $28)], joinType=[inner]): rowcount = 7.890398692349999E9, cumulative cost = {7.896430011596686E9 rows, 7263346.0 cpu, 0.0 io}\n" + + " EnumerableCalc(expr#0..27=[{inputs}], expr#28=[Sarg['1998Q1']:CHAR(6)], expr#29=[SEARCH($t15, $t28)], proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 18262.25, cumulative cost = {91311.25 rows, 4382941.0 cpu, 0.0 io}\n" + + " EnumerableTableScan(table=[[TPCDS, DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, STORE_SALES]]): rowcount = 2880404.0, cumulative cost = {2880404.0 rows, 2880405.0 cpu, 0.0 io}\n" - + " EnumerableHashJoin(condition=[AND(=($31, $79), =($30, $91))], joinType=[inner]): rowcount = 6.9978029381741304E16, cumulative cost = {7.0048032234040472E16 rows, 1.1225436E7 cpu, 0.0 io}\n" - + " EnumerableHashJoin(condition=[=($0, $28)], joinType=[inner]): rowcount = 7.87597881975E8, cumulative cost = {7.884434212216867E8 rows, 5035701.0 cpu, 0.0 io}\n" - + " EnumerableCalc(expr#0..27=[{inputs}], expr#28=['1998Q1'], expr#29=[=($t15, $t28)], expr#30=['1998Q2'], expr#31=[=($t15, $t30)], expr#32=['1998Q3'], expr#33=[=($t15, $t32)], expr#34=[OR($t29, $t31, $t33)], proj#0..27=[{exprs}], $condition=[$t34]): rowcount = 18262.25, cumulative cost = {91311.25 rows, 4748186.0 cpu, 0.0 io}\n" + + " EnumerableHashJoin(condition=[AND(=($31, $79), =($30, $91))], joinType=[inner]): rowcount = 6.9978029381741304E16, cumulative cost = {7.0048032234040472E16 rows, 1.0494946E7 cpu, 0.0 io}\n" + + " EnumerableHashJoin(condition=[=($0, $28)], joinType=[inner]): rowcount = 7.87597881975E8, cumulative cost = {7.884434212216867E8 rows, 4670456.0 cpu, 0.0 io}\n" + + " EnumerableCalc(expr#0..27=[{inputs}], expr#28=[Sarg['1998Q1', '1998Q2', '1998Q3']:CHAR(6)], expr#29=[SEARCH($t15, $t28)], proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 18262.25, cumulative cost = {91311.25 rows, 4382941.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, STORE_RETURNS]]): rowcount = 287514.0, cumulative cost = {287514.0 rows, 287515.0 cpu, 0.0 io}\n" - + " EnumerableHashJoin(condition=[=($0, $28)], joinType=[inner]): rowcount = 3.94888649445E9, cumulative cost = {3.9520401026966867E9 rows, 6189735.0 cpu, 0.0 io}\n" - + " EnumerableCalc(expr#0..27=[{inputs}], expr#28=['1998Q1'], expr#29=[=($t15, $t28)], expr#30=['1998Q2'], expr#31=[=($t15, $t30)], expr#32=['1998Q3'], expr#33=[=($t15, $t32)], expr#34=[OR($t29, $t31, $t33)], proj#0..27=[{exprs}], $condition=[$t34]): rowcount = 18262.25, cumulative cost = {91311.25 rows, 4748186.0 cpu, 0.0 io}\n" + + " EnumerableHashJoin(condition=[=($0, $28)], joinType=[inner]): rowcount = 3.94888649445E9, cumulative cost = {3.9520401026966867E9 rows, 5824490.0 cpu, 0.0 io}\n" + + " EnumerableCalc(expr#0..27=[{inputs}], expr#28=[Sarg['1998Q1', '1998Q2', '1998Q3']:CHAR(6)], expr#29=[SEARCH($t15, $t28)], proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 18262.25, cumulative cost = {91311.25 rows, 4382941.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, CATALOG_SALES]]): rowcount = 1441548.0, cumulative cost = {1441548.0 rows, 1441549.0 cpu, 0.0 io}\n")); } @@ -367,15 +366,13 @@ class TpcdsTest { builder.equals(builder.field("CD_EDUCATION_STATUS"), builder.literal("HIGH SCHOOL")), builder.equals(builder.field("D_YEAR"), builder.literal(1998)), - builder.call(SqlStdOperatorTable.IN, - builder.field("S_STATE"), - builder.call(SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR, - builder.literal("CA"), - builder.literal("OR"), - builder.literal("WA"), - builder.literal("TX"), - builder.literal("OK"), - builder.literal("MD")))) + builder.in(builder.field("S_STATE"), + builder.literal("CA"), + builder.literal("OR"), + builder.literal("WA"), + builder.literal("TX"), + builder.literal("OK"), + builder.literal("MD"))) .aggregate(builder.groupKey("I_ITEM_ID", "S_STATE"), builder.avg(false, "AGG1", builder.field("SS_QUANTITY")), builder.avg(false, "AGG2", builder.field("SS_LIST_PRICE")), @@ -386,7 +383,12 @@ class TpcdsTest { String expectResult = "" + "LogicalSort(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100])\n" + " LogicalAggregate(group=[{84, 90}], AGG1=[AVG($10)], AGG2=[AVG($12)], AGG3=[AVG($19)], AGG4=[AVG($13)])\n" - + " LogicalFilter(condition=[AND(=($0, $32), =($2, $89), =($7, $60), =($4, $23), =($24, 'M'), =($25, 'S'), =($26, 'HIGH SCHOOL'), =($38, 1998), IN($84, ARRAY('CA', 'OR', 'WA', 'TX', 'OK', 'MD')))])\n" + + " LogicalFilter(condition=[AND(=($0, $32), =($2, $89), " + + "=($7, $60), =($4, $23), SEARCH($24, Sarg['M']:CHAR(1)), " + + "SEARCH($25, Sarg['S']:CHAR(1)), " + + "SEARCH($26, Sarg['HIGH SCHOOL']:CHAR(11)), " + + "SEARCH($38, Sarg[1998]), SEARCH($84, Sarg['CA', 'MD', 'OK', 'OR', " + + "'TX', 'WA']:CHAR(2)))])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalTableScan(table=[[TPCDS, STORE_SALES]])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n"