This is an automated email from the ASF dual-hosted git repository.

jhyde pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new 60aa74f  [CALCITE-4237] Following [CALCITE-4173], fix assertion error 
when RexSimplify generates Sarg with single null only
60aa74f is described below

commit 60aa74f4e5b3b01ad90cb9ee9a4520483d45b2b4
Author: Vladimir Sitnikov <sitnikov.vladi...@gmail.com>
AuthorDate: Sun Sep 6 19:19:02 2020 +0300

    [CALCITE-4237] Following [CALCITE-4173], fix assertion error when 
RexSimplify generates Sarg with single null only
    
    Fix TpcdsTest expected output.
    
    Add a test for IS NULL Sarg (Julian Hyde).
    
    Close apache/calcite#2144
    Close apache/calcite#2141
---
 .../java/org/apache/calcite/rex/RexSimplify.java   | 16 ++++--
 .../org/apache/calcite/rex/RexProgramTest.java     | 15 ++++++
 .../apache/calcite/adapter/tpcds/TpcdsTest.java    | 58 +++++++++++-----------
 3 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java 
b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
index 464825e..63b3a32 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
@@ -2543,8 +2543,9 @@ public class RexSimplify {
         if (negate) {
           return false;
         }
-        return accept1(((RexCall) e).operands.get(0), e.getKind(),
-            rexBuilder.makeNullLiteral(e.getType()), newTerms);
+        final RexNode arg = ((RexCall) e).operands.get(0);
+        return accept1(arg, e.getKind(),
+            rexBuilder.makeNullLiteral(arg.getType()), newTerms);
       default:
         return false;
       }
@@ -2612,7 +2613,7 @@ public class RexSimplify {
         return true;
       case IS_NULL:
         if (negate) {
-          throw new AssertionError();
+          throw new AssertionError("negate is not supported for IS_NULL");
         }
         b.containsNull = true;
         return true;
@@ -2690,7 +2691,14 @@ public class RexSimplify {
     }
 
     @Override public RelDataType getType() {
-      return rexBuilder.typeFactory.leastRestrictive(Util.distinctList(types));
+      if (this.types.isEmpty()) {
+        // Expression is "x IS NULL"
+        return ref.getType();
+      }
+      final List<RelDataType> distinctTypes = Util.distinctList(this.types);
+      return Objects.requireNonNull(
+          rexBuilder.typeFactory.leastRestrictive(distinctTypes),
+          () -> "Can't find leastRestrictive type among " + distinctTypes);
     }
 
     @Override public <R> R accept(RexVisitor<R> visitor) {
diff --git a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java 
b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
index 1b836f2..b888834 100644
--- a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
+++ b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
@@ -1640,6 +1640,21 @@ class RexProgramTest extends RexProgramTestBase {
         .expandedSearch(expanded);
   }
 
+  @Test void testSimplifyRange6() {
+    // An IS NULL condition would not usually become a Sarg,
+    // but here it is combined with another condition, and together they cross
+    // the complexity threshold.
+    final RexNode aRef = input(tInt(true), 0);
+    final RexNode bRef = input(tInt(true), 1);
+    // a in (1, 2) or b is null
+    RexNode expr = or(eq(aRef, literal(1)), eq(aRef, literal(2)), 
isNull(bRef));
+    final String simplified =
+        "OR(SEARCH($1, Sarg[, null]), SEARCH($0, Sarg[1, 2]))";
+    final String expanded = "OR(IS NULL($1), OR(=($0, 1), =($0, 2)))";
+    checkSimplify(expr, simplified)
+        .expandedSearch(expanded);
+  }
+
   @Test void testSimplifyItemRangeTerms() {
     RexNode item = item(input(tArray(tInt()), 3), literal(1));
     // paranoid validation doesn't support array types, disable it for a moment
diff --git a/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java 
b/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java
index dec8206..02295a3 100644
--- a/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java
+++ b/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java
@@ -21,7 +21,6 @@ import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.runtime.Hook;
 import org.apache.calcite.schema.SchemaPlus;
-import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.calcite.sql.parser.SqlParser;
 import org.apache.calcite.test.CalciteAssert;
 import org.apache.calcite.tools.Frameworks;
@@ -220,27 +219,27 @@ class TpcdsTest {
         .withHook(Hook.PROGRAM, handler(true, 2))
         .explainMatches("including all attributes ",
             CalciteAssert.checkMaskedResultContains(""
-                + "EnumerableCalc(expr#0..9=[{inputs}], expr#10=[/($t4, $t3)], 
expr#11=[CAST($t10):INTEGER NOT NULL], expr#12=[*($t4, $t4)], expr#13=[/($t12, 
$t3)], expr#14=[-($t5, $t13)], expr#15=[1], expr#16=[=($t3, $t15)], 
expr#17=[null:BIGINT], expr#18=[-($t3, $t15)], expr#19=[CASE($t16, $t17, 
$t18)], expr#20=[/($t14, $t19)], expr#21=[0.5:DECIMAL(2, 1)], 
expr#22=[POWER($t20, $t21)], expr#23=[CAST($t22):INTEGER NOT NULL], 
expr#24=[/($t23, $t11)], expr#25=[/($t6, $t3)], expr#26=[CAST($ [...]
-                + "  EnumerableLimit(fetch=[100]): rowcount = 100.0, 
cumulative cost = {1.2435775409784036E28 rows, 2.555295485909236E30 cpu, 0.0 
io}\n"
-                + "    EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], 
dir0=[ASC], dir1=[ASC], dir2=[ASC]): rowcount = 5.434029018852197E26, 
cumulative cost = {1.2435775409784036E28 rows, 2.555295485909236E30 cpu, 0.0 
io}\n"
-                + "      EnumerableAggregate(group=[{0, 1, 2}], 
STORE_SALES_QUANTITYCOUNT=[COUNT()], agg#1=[$SUM0($3)], agg#2=[$SUM0($6)], 
agg#3=[$SUM0($4)], agg#4=[$SUM0($7)], agg#5=[$SUM0($5)], agg#6=[$SUM0($8)]): 
rowcount = 5.434029018852197E26, cumulative cost = {1.1892372507898816E28 rows, 
1.2172225002228922E30 cpu, 0.0 io}\n"
-                + "        EnumerableCalc(expr#0..211=[{inputs}], 
expr#212=[*($t89, $t89)], expr#213=[*($t140, $t140)], expr#214=[*($t196, 
$t196)], I_ITEM_ID=[$t58], I_ITEM_DESC=[$t61], S_STATE=[$t24], 
SS_QUANTITY=[$t89], SR_RETURN_QUANTITY=[$t140], CS_QUANTITY=[$t196], 
$f6=[$t212], $f7=[$t213], $f8=[$t214]): rowcount = 5.434029018852197E27, 
cumulative cost = {1.0873492066864028E28 rows, 1.2172225002228922E30 cpu, 0.0 
io}\n"
-                + "          EnumerableHashJoin(condition=[AND(=($82, $133), 
=($81, $132), =($88, $139))], joinType=[inner]): rowcount = 
5.434029018852197E27, cumulative cost = {5.439463048011832E27 rows, 1.8506796E7 
cpu, 0.0 io}\n"
-                + "            EnumerableHashJoin(condition=[=($0, $86)], 
joinType=[inner]): rowcount = 2.3008402586892598E13, cumulative cost = 
{4.8588854672854766E13 rows, 7281360.0 cpu, 0.0 io}\n"
+                + "EnumerableCalc(expr#0..9=[{inputs}], expr#10=[/($t4, $t3)], 
expr#11=[CAST($t10):INTEGER NOT NULL], expr#12=[*($t4, $t4)], expr#13=[/($t12, 
$t3)], expr#14=[-($t5, $t13)], expr#15=[1], expr#16=[=($t3, $t15)], 
expr#17=[null:BIGINT], expr#18=[-($t3, $t15)], expr#19=[CASE($t16, $t17, 
$t18)], expr#20=[/($t14, $t19)], expr#21=[0.5:DECIMAL(2, 1)], 
expr#22=[POWER($t20, $t21)], expr#23=[CAST($t22):INTEGER NOT NULL], 
expr#24=[/($t23, $t11)], expr#25=[/($t6, $t3)], expr#26=[CAST($ [...]
+                + "  EnumerableLimit(fetch=[100]): rowcount = 100.0, 
cumulative cost = {2.0726292349546726E28 rows, 4.277331418269512E30 cpu, 0.0 
io}\n"
+                + "    EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], 
dir0=[ASC], dir1=[ASC], dir2=[ASC]): rowcount = 9.056715031420328E26, 
cumulative cost = {2.0726292349546726E28 rows, 4.277331418269512E30 cpu, 0.0 
io}\n"
+                + "      EnumerableAggregate(group=[{0, 1, 2}], 
STORE_SALES_QUANTITYCOUNT=[COUNT()], agg#1=[$SUM0($3)], agg#2=[$SUM0($6)], 
agg#3=[$SUM0($4)], agg#4=[$SUM0($7)], agg#5=[$SUM0($5)], agg#6=[$SUM0($8)]): 
rowcount = 9.056715031420328E26, cumulative cost = {1.982062084640469E28 rows, 
2.0287041670381534E30 cpu, 0.0 io}\n"
+                + "        EnumerableCalc(expr#0..211=[{inputs}], 
expr#212=[*($t89, $t89)], expr#213=[*($t140, $t140)], expr#214=[*($t196, 
$t196)], I_ITEM_ID=[$t30], I_ITEM_DESC=[$t33], S_STATE=[$t24], 
SS_QUANTITY=[$t89], SR_RETURN_QUANTITY=[$t140], CS_QUANTITY=[$t196], 
$f6=[$t212], $f7=[$t213], $f8=[$t214]): rowcount = 9.056715031420328E27, 
cumulative cost = {1.8122486778013382E28 rows, 2.0287041670381534E30 cpu, 0.0 
io}\n"
+                + "          EnumerableHashJoin(condition=[AND(=($82, $133), 
=($81, $132), =($88, $139))], joinType=[inner]): rowcount = 
9.056715031420328E27, cumulative cost = {9.065771746593055E27 rows, 1.7776306E7 
cpu, 0.0 io}\n"
+                + "            EnumerableHashJoin(condition=[=($0, $86)], 
joinType=[inner]): rowcount = 3.8347337644821E13, cumulative cost = 
{8.097127760662506E13 rows, 7281360.0 cpu, 0.0 io}\n"
                 + "              EnumerableTableScan(table=[[TPCDS, STORE]]): 
rowcount = 12.0, cumulative cost = {12.0 rows, 13.0 cpu, 0.0 io}\n"
-                + "              EnumerableHashJoin(condition=[=($0, $50)], 
joinType=[inner]): rowcount = 1.2782445881607E13, cumulative cost = 
{1.279800620431234E13 rows, 7281347.0 cpu, 0.0 io}\n"
-                + "                EnumerableCalc(expr#0..27=[{inputs}], 
expr#28=['1998Q1'], expr#29=[=($t15, $t28)], proj#0..27=[{exprs}], 
$condition=[$t29]): rowcount = 10957.35, cumulative cost = {84006.35 rows, 
4382941.0 cpu, 0.0 io}\n"
-                + "                  EnumerableTableScan(table=[[TPCDS, 
DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 
0.0 io}\n"
-                + "                EnumerableHashJoin(condition=[=($0, $24)], 
joinType=[inner]): rowcount = 7.7770908E9, cumulative cost = 
{7.783045975286664E9 rows, 2898406.0 cpu, 0.0 io}\n"
-                + "                  EnumerableTableScan(table=[[TPCDS, 
ITEM]]): rowcount = 18000.0, cumulative cost = {18000.0 rows, 18001.0 cpu, 0.0 
io}\n"
+                + "              EnumerableHashJoin(condition=[=($0, $52)], 
joinType=[inner]): rowcount = 2.1304076469345E13, cumulative cost = 
{2.1319863492416234E13 rows, 7281347.0 cpu, 0.0 io}\n"
+                + "                EnumerableTableScan(table=[[TPCDS, ITEM]]): 
rowcount = 18000.0, cumulative cost = {18000.0 rows, 18001.0 cpu, 0.0 io}\n"
+                + "                EnumerableHashJoin(condition=[=($0, $28)], 
joinType=[inner]): rowcount = 7.890398692349999E9, cumulative cost = 
{7.896430011596686E9 rows, 7263346.0 cpu, 0.0 io}\n"
+                + "                  EnumerableCalc(expr#0..27=[{inputs}], 
expr#28=[Sarg['1998Q1']:CHAR(6)], expr#29=[SEARCH($t15, $t28)], 
proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 18262.25, cumulative cost 
= {91311.25 rows, 4382941.0 cpu, 0.0 io}\n"
+                + "                    EnumerableTableScan(table=[[TPCDS, 
DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 
0.0 io}\n"
                 + "                  EnumerableTableScan(table=[[TPCDS, 
STORE_SALES]]): rowcount = 2880404.0, cumulative cost = {2880404.0 rows, 
2880405.0 cpu, 0.0 io}\n"
-                + "            EnumerableHashJoin(condition=[AND(=($31, $79), 
=($30, $91))], joinType=[inner]): rowcount = 6.9978029381741304E16, cumulative 
cost = {7.0048032234040472E16 rows, 1.1225436E7 cpu, 0.0 io}\n"
-                + "              EnumerableHashJoin(condition=[=($0, $28)], 
joinType=[inner]): rowcount = 7.87597881975E8, cumulative cost = 
{7.884434212216867E8 rows, 5035701.0 cpu, 0.0 io}\n"
-                + "                EnumerableCalc(expr#0..27=[{inputs}], 
expr#28=['1998Q1'], expr#29=[=($t15, $t28)], expr#30=['1998Q2'], 
expr#31=[=($t15, $t30)], expr#32=['1998Q3'], expr#33=[=($t15, $t32)], 
expr#34=[OR($t29, $t31, $t33)], proj#0..27=[{exprs}], $condition=[$t34]): 
rowcount = 18262.25, cumulative cost = {91311.25 rows, 4748186.0 cpu, 0.0 io}\n"
+                + "            EnumerableHashJoin(condition=[AND(=($31, $79), 
=($30, $91))], joinType=[inner]): rowcount = 6.9978029381741304E16, cumulative 
cost = {7.0048032234040472E16 rows, 1.0494946E7 cpu, 0.0 io}\n"
+                + "              EnumerableHashJoin(condition=[=($0, $28)], 
joinType=[inner]): rowcount = 7.87597881975E8, cumulative cost = 
{7.884434212216867E8 rows, 4670456.0 cpu, 0.0 io}\n"
+                + "                EnumerableCalc(expr#0..27=[{inputs}], 
expr#28=[Sarg['1998Q1', '1998Q2', '1998Q3']:CHAR(6)], expr#29=[SEARCH($t15, 
$t28)], proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 18262.25, 
cumulative cost = {91311.25 rows, 4382941.0 cpu, 0.0 io}\n"
                 + "                  EnumerableTableScan(table=[[TPCDS, 
DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 
0.0 io}\n"
                 + "                EnumerableTableScan(table=[[TPCDS, 
STORE_RETURNS]]): rowcount = 287514.0, cumulative cost = {287514.0 rows, 
287515.0 cpu, 0.0 io}\n"
-                + "              EnumerableHashJoin(condition=[=($0, $28)], 
joinType=[inner]): rowcount = 3.94888649445E9, cumulative cost = 
{3.9520401026966867E9 rows, 6189735.0 cpu, 0.0 io}\n"
-                + "                EnumerableCalc(expr#0..27=[{inputs}], 
expr#28=['1998Q1'], expr#29=[=($t15, $t28)], expr#30=['1998Q2'], 
expr#31=[=($t15, $t30)], expr#32=['1998Q3'], expr#33=[=($t15, $t32)], 
expr#34=[OR($t29, $t31, $t33)], proj#0..27=[{exprs}], $condition=[$t34]): 
rowcount = 18262.25, cumulative cost = {91311.25 rows, 4748186.0 cpu, 0.0 io}\n"
+                + "              EnumerableHashJoin(condition=[=($0, $28)], 
joinType=[inner]): rowcount = 3.94888649445E9, cumulative cost = 
{3.9520401026966867E9 rows, 5824490.0 cpu, 0.0 io}\n"
+                + "                EnumerableCalc(expr#0..27=[{inputs}], 
expr#28=[Sarg['1998Q1', '1998Q2', '1998Q3']:CHAR(6)], expr#29=[SEARCH($t15, 
$t28)], proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 18262.25, 
cumulative cost = {91311.25 rows, 4382941.0 cpu, 0.0 io}\n"
                 + "                  EnumerableTableScan(table=[[TPCDS, 
DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 
0.0 io}\n"
                 + "                EnumerableTableScan(table=[[TPCDS, 
CATALOG_SALES]]): rowcount = 1441548.0, cumulative cost = {1441548.0 rows, 
1441549.0 cpu, 0.0 io}\n"));
   }
@@ -367,15 +366,13 @@ class TpcdsTest {
                 builder.equals(builder.field("CD_EDUCATION_STATUS"),
                     builder.literal("HIGH SCHOOL")),
                 builder.equals(builder.field("D_YEAR"), builder.literal(1998)),
-                builder.call(SqlStdOperatorTable.IN,
-                    builder.field("S_STATE"),
-                    builder.call(SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR,
-                        builder.literal("CA"),
-                        builder.literal("OR"),
-                        builder.literal("WA"),
-                        builder.literal("TX"),
-                        builder.literal("OK"),
-                        builder.literal("MD"))))
+                builder.in(builder.field("S_STATE"),
+                    builder.literal("CA"),
+                    builder.literal("OR"),
+                    builder.literal("WA"),
+                    builder.literal("TX"),
+                    builder.literal("OK"),
+                    builder.literal("MD")))
             .aggregate(builder.groupKey("I_ITEM_ID", "S_STATE"),
                 builder.avg(false, "AGG1", builder.field("SS_QUANTITY")),
                 builder.avg(false, "AGG2", builder.field("SS_LIST_PRICE")),
@@ -386,7 +383,12 @@ class TpcdsTest {
     String expectResult = ""
         + "LogicalSort(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], 
fetch=[100])\n"
         + "  LogicalAggregate(group=[{84, 90}], AGG1=[AVG($10)], 
AGG2=[AVG($12)], AGG3=[AVG($19)], AGG4=[AVG($13)])\n"
-        + "    LogicalFilter(condition=[AND(=($0, $32), =($2, $89), =($7, 
$60), =($4, $23), =($24, 'M'), =($25, 'S'), =($26, 'HIGH SCHOOL'), =($38, 
1998), IN($84, ARRAY('CA', 'OR', 'WA', 'TX', 'OK', 'MD')))])\n"
+        + "    LogicalFilter(condition=[AND(=($0, $32), =($2, $89), "
+        + "=($7, $60), =($4, $23), SEARCH($24, Sarg['M']:CHAR(1)), "
+        + "SEARCH($25, Sarg['S']:CHAR(1)), "
+        + "SEARCH($26, Sarg['HIGH SCHOOL']:CHAR(11)), "
+        + "SEARCH($38, Sarg[1998]), SEARCH($84, Sarg['CA', 'MD', 'OK', 'OR', "
+        + "'TX', 'WA']:CHAR(2)))])\n"
         + "      LogicalJoin(condition=[true], joinType=[inner])\n"
         + "        LogicalTableScan(table=[[TPCDS, STORE_SALES]])\n"
         + "        LogicalJoin(condition=[true], joinType=[inner])\n"

Reply via email to