This is an automated email from the ASF dual-hosted git repository.
jhyde pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new 60aa74f [CALCITE-4237] Following [CALCITE-4173], fix assertion error
when RexSimplify generates Sarg with single null only
60aa74f is described below
commit 60aa74f4e5b3b01ad90cb9ee9a4520483d45b2b4
Author: Vladimir Sitnikov <[email protected]>
AuthorDate: Sun Sep 6 19:19:02 2020 +0300
[CALCITE-4237] Following [CALCITE-4173], fix assertion error when
RexSimplify generates Sarg with single null only
Fix TpcdsTest expected output.
Add a test for IS NULL Sarg (Julian Hyde).
Close apache/calcite#2144
Close apache/calcite#2141
---
.../java/org/apache/calcite/rex/RexSimplify.java | 16 ++++--
.../org/apache/calcite/rex/RexProgramTest.java | 15 ++++++
.../apache/calcite/adapter/tpcds/TpcdsTest.java | 58 +++++++++++-----------
3 files changed, 57 insertions(+), 32 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
index 464825e..63b3a32 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
@@ -2543,8 +2543,9 @@ public class RexSimplify {
if (negate) {
return false;
}
- return accept1(((RexCall) e).operands.get(0), e.getKind(),
- rexBuilder.makeNullLiteral(e.getType()), newTerms);
+ final RexNode arg = ((RexCall) e).operands.get(0);
+ return accept1(arg, e.getKind(),
+ rexBuilder.makeNullLiteral(arg.getType()), newTerms);
default:
return false;
}
@@ -2612,7 +2613,7 @@ public class RexSimplify {
return true;
case IS_NULL:
if (negate) {
- throw new AssertionError();
+ throw new AssertionError("negate is not supported for IS_NULL");
}
b.containsNull = true;
return true;
@@ -2690,7 +2691,14 @@ public class RexSimplify {
}
@Override public RelDataType getType() {
- return rexBuilder.typeFactory.leastRestrictive(Util.distinctList(types));
+ if (this.types.isEmpty()) {
+ // Expression is "x IS NULL"
+ return ref.getType();
+ }
+ final List<RelDataType> distinctTypes = Util.distinctList(this.types);
+ return Objects.requireNonNull(
+ rexBuilder.typeFactory.leastRestrictive(distinctTypes),
+ () -> "Can't find leastRestrictive type among " + distinctTypes);
}
@Override public <R> R accept(RexVisitor<R> visitor) {
diff --git a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
index 1b836f2..b888834 100644
--- a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
+++ b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
@@ -1640,6 +1640,21 @@ class RexProgramTest extends RexProgramTestBase {
.expandedSearch(expanded);
}
+ @Test void testSimplifyRange6() {
+ // An IS NULL condition would not usually become a Sarg,
+ // but here it is combined with another condition, and together they cross
+ // the complexity threshold.
+ final RexNode aRef = input(tInt(true), 0);
+ final RexNode bRef = input(tInt(true), 1);
+ // a in (1, 2) or b is null
+ RexNode expr = or(eq(aRef, literal(1)), eq(aRef, literal(2)),
isNull(bRef));
+ final String simplified =
+ "OR(SEARCH($1, Sarg[, null]), SEARCH($0, Sarg[1, 2]))";
+ final String expanded = "OR(IS NULL($1), OR(=($0, 1), =($0, 2)))";
+ checkSimplify(expr, simplified)
+ .expandedSearch(expanded);
+ }
+
@Test void testSimplifyItemRangeTerms() {
RexNode item = item(input(tArray(tInt()), 3), literal(1));
// paranoid validation doesn't support array types, disable it for a moment
diff --git a/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java
b/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java
index dec8206..02295a3 100644
--- a/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java
+++ b/plus/src/test/java/org/apache/calcite/adapter/tpcds/TpcdsTest.java
@@ -21,7 +21,6 @@ import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.runtime.Hook;
import org.apache.calcite.schema.SchemaPlus;
-import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.test.CalciteAssert;
import org.apache.calcite.tools.Frameworks;
@@ -220,27 +219,27 @@ class TpcdsTest {
.withHook(Hook.PROGRAM, handler(true, 2))
.explainMatches("including all attributes ",
CalciteAssert.checkMaskedResultContains(""
- + "EnumerableCalc(expr#0..9=[{inputs}], expr#10=[/($t4, $t3)],
expr#11=[CAST($t10):INTEGER NOT NULL], expr#12=[*($t4, $t4)], expr#13=[/($t12,
$t3)], expr#14=[-($t5, $t13)], expr#15=[1], expr#16=[=($t3, $t15)],
expr#17=[null:BIGINT], expr#18=[-($t3, $t15)], expr#19=[CASE($t16, $t17,
$t18)], expr#20=[/($t14, $t19)], expr#21=[0.5:DECIMAL(2, 1)],
expr#22=[POWER($t20, $t21)], expr#23=[CAST($t22):INTEGER NOT NULL],
expr#24=[/($t23, $t11)], expr#25=[/($t6, $t3)], expr#26=[CAST($ [...]
- + " EnumerableLimit(fetch=[100]): rowcount = 100.0,
cumulative cost = {1.2435775409784036E28 rows, 2.555295485909236E30 cpu, 0.0
io}\n"
- + " EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2],
dir0=[ASC], dir1=[ASC], dir2=[ASC]): rowcount = 5.434029018852197E26,
cumulative cost = {1.2435775409784036E28 rows, 2.555295485909236E30 cpu, 0.0
io}\n"
- + " EnumerableAggregate(group=[{0, 1, 2}],
STORE_SALES_QUANTITYCOUNT=[COUNT()], agg#1=[$SUM0($3)], agg#2=[$SUM0($6)],
agg#3=[$SUM0($4)], agg#4=[$SUM0($7)], agg#5=[$SUM0($5)], agg#6=[$SUM0($8)]):
rowcount = 5.434029018852197E26, cumulative cost = {1.1892372507898816E28 rows,
1.2172225002228922E30 cpu, 0.0 io}\n"
- + " EnumerableCalc(expr#0..211=[{inputs}],
expr#212=[*($t89, $t89)], expr#213=[*($t140, $t140)], expr#214=[*($t196,
$t196)], I_ITEM_ID=[$t58], I_ITEM_DESC=[$t61], S_STATE=[$t24],
SS_QUANTITY=[$t89], SR_RETURN_QUANTITY=[$t140], CS_QUANTITY=[$t196],
$f6=[$t212], $f7=[$t213], $f8=[$t214]): rowcount = 5.434029018852197E27,
cumulative cost = {1.0873492066864028E28 rows, 1.2172225002228922E30 cpu, 0.0
io}\n"
- + " EnumerableHashJoin(condition=[AND(=($82, $133),
=($81, $132), =($88, $139))], joinType=[inner]): rowcount =
5.434029018852197E27, cumulative cost = {5.439463048011832E27 rows, 1.8506796E7
cpu, 0.0 io}\n"
- + " EnumerableHashJoin(condition=[=($0, $86)],
joinType=[inner]): rowcount = 2.3008402586892598E13, cumulative cost =
{4.8588854672854766E13 rows, 7281360.0 cpu, 0.0 io}\n"
+ + "EnumerableCalc(expr#0..9=[{inputs}], expr#10=[/($t4, $t3)],
expr#11=[CAST($t10):INTEGER NOT NULL], expr#12=[*($t4, $t4)], expr#13=[/($t12,
$t3)], expr#14=[-($t5, $t13)], expr#15=[1], expr#16=[=($t3, $t15)],
expr#17=[null:BIGINT], expr#18=[-($t3, $t15)], expr#19=[CASE($t16, $t17,
$t18)], expr#20=[/($t14, $t19)], expr#21=[0.5:DECIMAL(2, 1)],
expr#22=[POWER($t20, $t21)], expr#23=[CAST($t22):INTEGER NOT NULL],
expr#24=[/($t23, $t11)], expr#25=[/($t6, $t3)], expr#26=[CAST($ [...]
+ + " EnumerableLimit(fetch=[100]): rowcount = 100.0,
cumulative cost = {2.0726292349546726E28 rows, 4.277331418269512E30 cpu, 0.0
io}\n"
+ + " EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2],
dir0=[ASC], dir1=[ASC], dir2=[ASC]): rowcount = 9.056715031420328E26,
cumulative cost = {2.0726292349546726E28 rows, 4.277331418269512E30 cpu, 0.0
io}\n"
+ + " EnumerableAggregate(group=[{0, 1, 2}],
STORE_SALES_QUANTITYCOUNT=[COUNT()], agg#1=[$SUM0($3)], agg#2=[$SUM0($6)],
agg#3=[$SUM0($4)], agg#4=[$SUM0($7)], agg#5=[$SUM0($5)], agg#6=[$SUM0($8)]):
rowcount = 9.056715031420328E26, cumulative cost = {1.982062084640469E28 rows,
2.0287041670381534E30 cpu, 0.0 io}\n"
+ + " EnumerableCalc(expr#0..211=[{inputs}],
expr#212=[*($t89, $t89)], expr#213=[*($t140, $t140)], expr#214=[*($t196,
$t196)], I_ITEM_ID=[$t30], I_ITEM_DESC=[$t33], S_STATE=[$t24],
SS_QUANTITY=[$t89], SR_RETURN_QUANTITY=[$t140], CS_QUANTITY=[$t196],
$f6=[$t212], $f7=[$t213], $f8=[$t214]): rowcount = 9.056715031420328E27,
cumulative cost = {1.8122486778013382E28 rows, 2.0287041670381534E30 cpu, 0.0
io}\n"
+ + " EnumerableHashJoin(condition=[AND(=($82, $133),
=($81, $132), =($88, $139))], joinType=[inner]): rowcount =
9.056715031420328E27, cumulative cost = {9.065771746593055E27 rows, 1.7776306E7
cpu, 0.0 io}\n"
+ + " EnumerableHashJoin(condition=[=($0, $86)],
joinType=[inner]): rowcount = 3.8347337644821E13, cumulative cost =
{8.097127760662506E13 rows, 7281360.0 cpu, 0.0 io}\n"
+ " EnumerableTableScan(table=[[TPCDS, STORE]]):
rowcount = 12.0, cumulative cost = {12.0 rows, 13.0 cpu, 0.0 io}\n"
- + " EnumerableHashJoin(condition=[=($0, $50)],
joinType=[inner]): rowcount = 1.2782445881607E13, cumulative cost =
{1.279800620431234E13 rows, 7281347.0 cpu, 0.0 io}\n"
- + " EnumerableCalc(expr#0..27=[{inputs}],
expr#28=['1998Q1'], expr#29=[=($t15, $t28)], proj#0..27=[{exprs}],
$condition=[$t29]): rowcount = 10957.35, cumulative cost = {84006.35 rows,
4382941.0 cpu, 0.0 io}\n"
- + " EnumerableTableScan(table=[[TPCDS,
DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu,
0.0 io}\n"
- + " EnumerableHashJoin(condition=[=($0, $24)],
joinType=[inner]): rowcount = 7.7770908E9, cumulative cost =
{7.783045975286664E9 rows, 2898406.0 cpu, 0.0 io}\n"
- + " EnumerableTableScan(table=[[TPCDS,
ITEM]]): rowcount = 18000.0, cumulative cost = {18000.0 rows, 18001.0 cpu, 0.0
io}\n"
+ + " EnumerableHashJoin(condition=[=($0, $52)],
joinType=[inner]): rowcount = 2.1304076469345E13, cumulative cost =
{2.1319863492416234E13 rows, 7281347.0 cpu, 0.0 io}\n"
+ + " EnumerableTableScan(table=[[TPCDS, ITEM]]):
rowcount = 18000.0, cumulative cost = {18000.0 rows, 18001.0 cpu, 0.0 io}\n"
+ + " EnumerableHashJoin(condition=[=($0, $28)],
joinType=[inner]): rowcount = 7.890398692349999E9, cumulative cost =
{7.896430011596686E9 rows, 7263346.0 cpu, 0.0 io}\n"
+ + " EnumerableCalc(expr#0..27=[{inputs}],
expr#28=[Sarg['1998Q1']:CHAR(6)], expr#29=[SEARCH($t15, $t28)],
proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 18262.25, cumulative cost
= {91311.25 rows, 4382941.0 cpu, 0.0 io}\n"
+ + " EnumerableTableScan(table=[[TPCDS,
DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu,
0.0 io}\n"
+ " EnumerableTableScan(table=[[TPCDS,
STORE_SALES]]): rowcount = 2880404.0, cumulative cost = {2880404.0 rows,
2880405.0 cpu, 0.0 io}\n"
- + " EnumerableHashJoin(condition=[AND(=($31, $79),
=($30, $91))], joinType=[inner]): rowcount = 6.9978029381741304E16, cumulative
cost = {7.0048032234040472E16 rows, 1.1225436E7 cpu, 0.0 io}\n"
- + " EnumerableHashJoin(condition=[=($0, $28)],
joinType=[inner]): rowcount = 7.87597881975E8, cumulative cost =
{7.884434212216867E8 rows, 5035701.0 cpu, 0.0 io}\n"
- + " EnumerableCalc(expr#0..27=[{inputs}],
expr#28=['1998Q1'], expr#29=[=($t15, $t28)], expr#30=['1998Q2'],
expr#31=[=($t15, $t30)], expr#32=['1998Q3'], expr#33=[=($t15, $t32)],
expr#34=[OR($t29, $t31, $t33)], proj#0..27=[{exprs}], $condition=[$t34]):
rowcount = 18262.25, cumulative cost = {91311.25 rows, 4748186.0 cpu, 0.0 io}\n"
+ + " EnumerableHashJoin(condition=[AND(=($31, $79),
=($30, $91))], joinType=[inner]): rowcount = 6.9978029381741304E16, cumulative
cost = {7.0048032234040472E16 rows, 1.0494946E7 cpu, 0.0 io}\n"
+ + " EnumerableHashJoin(condition=[=($0, $28)],
joinType=[inner]): rowcount = 7.87597881975E8, cumulative cost =
{7.884434212216867E8 rows, 4670456.0 cpu, 0.0 io}\n"
+ + " EnumerableCalc(expr#0..27=[{inputs}],
expr#28=[Sarg['1998Q1', '1998Q2', '1998Q3']:CHAR(6)], expr#29=[SEARCH($t15,
$t28)], proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 18262.25,
cumulative cost = {91311.25 rows, 4382941.0 cpu, 0.0 io}\n"
+ " EnumerableTableScan(table=[[TPCDS,
DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu,
0.0 io}\n"
+ " EnumerableTableScan(table=[[TPCDS,
STORE_RETURNS]]): rowcount = 287514.0, cumulative cost = {287514.0 rows,
287515.0 cpu, 0.0 io}\n"
- + " EnumerableHashJoin(condition=[=($0, $28)],
joinType=[inner]): rowcount = 3.94888649445E9, cumulative cost =
{3.9520401026966867E9 rows, 6189735.0 cpu, 0.0 io}\n"
- + " EnumerableCalc(expr#0..27=[{inputs}],
expr#28=['1998Q1'], expr#29=[=($t15, $t28)], expr#30=['1998Q2'],
expr#31=[=($t15, $t30)], expr#32=['1998Q3'], expr#33=[=($t15, $t32)],
expr#34=[OR($t29, $t31, $t33)], proj#0..27=[{exprs}], $condition=[$t34]):
rowcount = 18262.25, cumulative cost = {91311.25 rows, 4748186.0 cpu, 0.0 io}\n"
+ + " EnumerableHashJoin(condition=[=($0, $28)],
joinType=[inner]): rowcount = 3.94888649445E9, cumulative cost =
{3.9520401026966867E9 rows, 5824490.0 cpu, 0.0 io}\n"
+ + " EnumerableCalc(expr#0..27=[{inputs}],
expr#28=[Sarg['1998Q1', '1998Q2', '1998Q3']:CHAR(6)], expr#29=[SEARCH($t15,
$t28)], proj#0..27=[{exprs}], $condition=[$t29]): rowcount = 18262.25,
cumulative cost = {91311.25 rows, 4382941.0 cpu, 0.0 io}\n"
+ " EnumerableTableScan(table=[[TPCDS,
DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu,
0.0 io}\n"
+ " EnumerableTableScan(table=[[TPCDS,
CATALOG_SALES]]): rowcount = 1441548.0, cumulative cost = {1441548.0 rows,
1441549.0 cpu, 0.0 io}\n"));
}
@@ -367,15 +366,13 @@ class TpcdsTest {
builder.equals(builder.field("CD_EDUCATION_STATUS"),
builder.literal("HIGH SCHOOL")),
builder.equals(builder.field("D_YEAR"), builder.literal(1998)),
- builder.call(SqlStdOperatorTable.IN,
- builder.field("S_STATE"),
- builder.call(SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR,
- builder.literal("CA"),
- builder.literal("OR"),
- builder.literal("WA"),
- builder.literal("TX"),
- builder.literal("OK"),
- builder.literal("MD"))))
+ builder.in(builder.field("S_STATE"),
+ builder.literal("CA"),
+ builder.literal("OR"),
+ builder.literal("WA"),
+ builder.literal("TX"),
+ builder.literal("OK"),
+ builder.literal("MD")))
.aggregate(builder.groupKey("I_ITEM_ID", "S_STATE"),
builder.avg(false, "AGG1", builder.field("SS_QUANTITY")),
builder.avg(false, "AGG2", builder.field("SS_LIST_PRICE")),
@@ -386,7 +383,12 @@ class TpcdsTest {
String expectResult = ""
+ "LogicalSort(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC],
fetch=[100])\n"
+ " LogicalAggregate(group=[{84, 90}], AGG1=[AVG($10)],
AGG2=[AVG($12)], AGG3=[AVG($19)], AGG4=[AVG($13)])\n"
- + " LogicalFilter(condition=[AND(=($0, $32), =($2, $89), =($7,
$60), =($4, $23), =($24, 'M'), =($25, 'S'), =($26, 'HIGH SCHOOL'), =($38,
1998), IN($84, ARRAY('CA', 'OR', 'WA', 'TX', 'OK', 'MD')))])\n"
+ + " LogicalFilter(condition=[AND(=($0, $32), =($2, $89), "
+ + "=($7, $60), =($4, $23), SEARCH($24, Sarg['M']:CHAR(1)), "
+ + "SEARCH($25, Sarg['S']:CHAR(1)), "
+ + "SEARCH($26, Sarg['HIGH SCHOOL']:CHAR(11)), "
+ + "SEARCH($38, Sarg[1998]), SEARCH($84, Sarg['CA', 'MD', 'OK', 'OR', "
+ + "'TX', 'WA']:CHAR(2)))])\n"
+ " LogicalJoin(condition=[true], joinType=[inner])\n"
+ " LogicalTableScan(table=[[TPCDS, STORE_SALES]])\n"
+ " LogicalJoin(condition=[true], joinType=[inner])\n"