This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin5
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 090e3a6c7f70339c3a3549856de3bfcc05ea1936
Author: Bowen Song <bowen.s...@kyligence.io>
AuthorDate: Sun Nov 20 15:23:41 2022 +0800

    KYLIN-5412 add condition to calcite project merge rule to fix OOM
---
 pom.xml                                            |  2 +-
 .../org/apache/kylin/common/KylinConfigBase.java   | 12 +++++--
 .../kylin/query/engine/SqlToRelNodeTest.java       | 37 ++++++++++++++++++++++
 .../apache/kylin/query/engine/SqlToRelNodeTest.xml | 26 +++++++++++++++
 .../kylin/query/engine/SumExprPlannerTest.xml      | 36 +++++++++++----------
 .../kap/query/optrule/KapProjectMergeRule.java     | 14 ++++++--
 6 files changed, 106 insertions(+), 21 deletions(-)

diff --git a/pom.xml b/pom.xml
index 5b85bc60bf..f9304728e8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -73,7 +73,7 @@
         <scala-retry>0.3.0</scala-retry>
 
         <!-- Calcite Version -->
-        <calcite.version>1.116.0-kylin-4.x-r023</calcite.version>
+        <calcite.version>1.116.0-kylin-4.x-r024</calcite.version>
         <avatica.version>4.x_1.10-r01</avatica.version>
 
         <!-- Hadoop Common deps, keep compatible with hadoop2.version -->
diff --git 
a/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java 
b/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 95fa963d5e..4405eede60 100644
--- a/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -3712,11 +3712,19 @@ public abstract class KylinConfigBase implements 
Serializable {
         return 
Integer.parseInt(getOptional("kylin.second-storage.wait-lock-timeout", "180"));
     }
 
+    public boolean getDDLEnabled() {
+        return Boolean.parseBoolean(getOptional("kylin.source.ddl.enabled", 
FALSE));
+    }
+
     public boolean isBuildSegmentOverlapEnabled() {
         return 
Boolean.parseBoolean(getOptional("kylin.build.segment-overlap-enabled", FALSE));
     }
 
-    public boolean getDDLEnabled() {
-        return Boolean.parseBoolean(getOptional("kylin.source.ddl.enabled", 
FALSE));
+    public boolean isProjectMergeWithBloatEnabled() {
+        return 
Boolean.parseBoolean(getOptional("kylin.query.kap-project-merge-with-bloat-enabled",
 "true"));
+    }
+
+    public int getKapProjectMergeRuleBloatThreshold() {
+        return 
Integer.parseInt(getOptional("kylin.query.kap-project-merge-bloat-threshold", 
"0"));
     }
 }
diff --git 
a/src/kylin-it/src/test/java/org/apache/kylin/query/engine/SqlToRelNodeTest.java
 
b/src/kylin-it/src/test/java/org/apache/kylin/query/engine/SqlToRelNodeTest.java
index 09a6ff74ad..64f5ea16d2 100644
--- 
a/src/kylin-it/src/test/java/org/apache/kylin/query/engine/SqlToRelNodeTest.java
+++ 
b/src/kylin-it/src/test/java/org/apache/kylin/query/engine/SqlToRelNodeTest.java
@@ -84,6 +84,43 @@ public class SqlToRelNodeTest extends CalciteRuleTestBase {
         checkSQLOptimize(PROJECT, sql.getSecond(), "query_sql_in_query02");
     }
 
+    @Test
+    public void testProjectMergeWithBloat() throws Exception {
+        String sql = "select q.x + q.x from( select (p.v + p.v) as x from 
(select (case when TRANS_ID > 60 then 1 else 0 end) v from test_kylin_fact) 
p)q";
+
+        try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = 
KylinConfig.setAndUnsetThreadLocalConfig(config)) {
+            
config.setProperty("kylin.query.kap-project-merge-with-bloat-enabled", "false");
+            QueryExec exec1 = new QueryExec(PROJECT, config);
+            RelRoot relRoot = exec1.sqlToRelRoot(sql);
+            RelNode rel = exec1.optimize(relRoot).rel;
+            String realPlan = NL + RelOptUtil.toString(rel);
+            diff.assertEquals("bloat_merge_sql.bloat_disabled", 
"${bloat_merge_sql.bloat_disabled}", realPlan);
+
+            
config.setProperty("kylin.query.kap-project-merge-with-bloat-enabled", "true");
+            QueryExec exec2 = new QueryExec(PROJECT, config);
+            relRoot = exec2.sqlToRelRoot(sql);
+            rel = exec2.optimize(relRoot).rel;
+            realPlan = NL + RelOptUtil.toString(rel);
+            diff.assertEquals("bloat_merge_sql.bloat_enabled", 
"${bloat_merge_sql.bloat_enabled}", realPlan);
+
+            
config.setProperty("kylin.query.kap-project-merge-with-bloat-enabled", "true");
+            
config.setProperty("kylin.query.kap-project-merge-bloat-threshold", "5");
+            QueryExec exec3 = new QueryExec(PROJECT, config);
+            relRoot = exec3.sqlToRelRoot(sql);
+            rel = exec3.optimize(relRoot).rel;
+            realPlan = NL + RelOptUtil.toString(rel);
+            diff.assertEquals("bloat_merge_sql.bloat_enabled_bloat_5", 
"${bloat_merge_sql.bloat_enabled_bloat_5}", realPlan);
+
+            
config.setProperty("kylin.query.kap-project-merge-bloat-threshold", "100");
+            QueryExec exec4 = new QueryExec(PROJECT, config);
+            relRoot = exec4.sqlToRelRoot(sql);
+            rel = exec4.optimize(relRoot).rel;
+            realPlan = NL + RelOptUtil.toString(rel);
+            diff.assertEquals("bloat_merge_sql.bloat_disabled", 
"${bloat_merge_sql.bloat_disabled}", realPlan);
+        }
+
+    }
+
     /**
      * Visitor that checks that every {@link RelNode} in a tree is valid.
      *
diff --git 
a/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SqlToRelNodeTest.xml
 
b/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SqlToRelNodeTest.xml
index 3b02bd2722..dca32e385e 100644
--- 
a/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SqlToRelNodeTest.xml
+++ 
b/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SqlToRelNodeTest.xml
@@ -144,6 +144,32 @@ KapOLAPToEnumerableConverter
     KapProjectRel(LSTG_FORMAT_NAME=[$3], ctx=[])
       KapFilterRel(condition=[OR(=($3, null), =($3, 'FP-GTC'))], ctx=[])
         KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
+]]>
+        </Resource>
+    </TestCase>
+    <TestCase name="testProjectMergeWithBloat">
+        <Resource name="bloat_merge_sql.bloat_disabled">
+            <![CDATA[
+KapOLAPToEnumerableConverter
+  KapProjectRel(EXPR$0=[+(+(CASE(>($0, 60), 1, 0), CASE(>($0, 60), 1, 0)), 
+(CASE(>($0, 60), 1, 0), CASE(>($0, 60), 1, 0)))], ctx=[])
+    KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
+]]>
+        </Resource>
+        <Resource name="bloat_merge_sql.bloat_enabled">
+            <![CDATA[
+KapOLAPToEnumerableConverter
+  KapProjectRel(EXPR$0=[+($0, $0)], ctx=[])
+    KapProjectRel(X=[+($0, $0)], ctx=[])
+      KapProjectRel(V=[CASE(>($0, 60), 1, 0)], ctx=[])
+        KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
+]]>
+        </Resource>
+        <Resource name="bloat_merge_sql.bloat_enabled_bloat_5">
+            <![CDATA[
+KapOLAPToEnumerableConverter
+  KapProjectRel(EXPR$0=[+(+($0, $0), +($0, $0))], ctx=[])
+    KapProjectRel(V=[CASE(>($0, 60), 1, 0)], ctx=[])
+      KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
 ]]>
         </Resource>
     </TestCase>
diff --git 
a/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SumExprPlannerTest.xml
 
b/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SumExprPlannerTest.xml
index 1c5a41e368..e7b560f9a5 100644
--- 
a/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SumExprPlannerTest.xml
+++ 
b/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SumExprPlannerTest.xml
@@ -361,16 +361,18 @@ KapOLAPToEnumerableConverter
   KapUnionRel(all=[true], ctx=[], all=[true])
     KapProjectRel(EXPR=[$0], PRI=[CAST(/($1, $2)):BIGINT], PPP=[CAST(/(-($3, 
/(*($4, $4), $5)), $5)):BIGINT], ctx=[])
       KapAggregateRel(group-set=[[]], groups=[null], EXPR=[SUM($0)], 
agg#1=[SUM($1)], agg#2=[COUNT($1)], agg#3=[SUM($2)], agg#4=[SUM($3)], 
agg#5=[COUNT($3)], ctx=[])
-        KapProjectRel($f0=[*($0, 2)], $f1=[*(/($1, $2), 2)], $f3=[*(/($1, $2), 
/($1, $2))], PRI=[/($1, $2)], ctx=[])
-          KapAggregateRel(group-set=[[]], groups=[null], A1=[SUM($0)], 
A2=[SUM($1)], agg#2=[COUNT($1)], ctx=[])
-            KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[])
-              KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], 
fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
+        KapProjectRel($f0=[$0], $f1=[$1], $f3=[*($2, $2)], PRI=[$2], ctx=[])
+          KapProjectRel($f0=[*($0, 2)], $f1=[*(/($1, $2), 2)], PRI=[/($1, 
$2)], ctx=[])
+            KapAggregateRel(group-set=[[]], groups=[null], A1=[SUM($0)], 
A2=[SUM($1)], agg#2=[COUNT($1)], ctx=[])
+              KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[])
+                KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], 
fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
     KapProjectRel(EXPR=[$0], PRI=[CAST(/($1, $2)):BIGINT], PPP=[CAST(/(-($3, 
/(*($4, $4), $5)), $5)):BIGINT], ctx=[])
       KapAggregateRel(group-set=[[]], groups=[null], EXPR=[SUM($0)], 
agg#1=[SUM($1)], agg#2=[COUNT($1)], agg#3=[SUM($2)], agg#4=[SUM($3)], 
agg#5=[COUNT($3)], ctx=[])
-        KapProjectRel($f0=[*($1, 2)], $f1=[*(/($2, $3), 2)], $f3=[*(/($2, $3), 
/($2, $3))], PRI=[/($2, $3)], ctx=[])
-          KapAggregateRel(group-set=[[]], groups=[null], MIN_PRI=[MIN($0)], 
A1=[SUM($0)], A2=[SUM($1)], agg#3=[COUNT($1)], ctx=[])
-            KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[])
-              KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], 
fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
+        KapProjectRel($f0=[$0], $f1=[$1], $f3=[*($2, $2)], PRI=[$2], ctx=[])
+          KapProjectRel($f0=[*($1, 2)], $f1=[*(/($2, $3), 2)], PRI=[/($2, 
$3)], ctx=[])
+            KapAggregateRel(group-set=[[]], groups=[null], MIN_PRI=[MIN($0)], 
A1=[SUM($0)], A2=[SUM($1)], agg#3=[COUNT($1)], ctx=[])
+              KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[])
+                KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], 
fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
 ]]>
         </Resource>
         <Resource name="query11.planAfter">
@@ -379,16 +381,18 @@ KapOLAPToEnumerableConverter
   KapUnionRel(all=[true], ctx=[], all=[true])
     KapProjectRel(EXPR=[$0], PRI=[CAST(/($1, $2)):BIGINT], PPP=[CAST(/(-($3, 
/(*($4, $4), $5)), $5)):BIGINT], ctx=[])
       KapAggregateRel(group-set=[[]], groups=[null], EXPR=[SUM($0)], 
agg#1=[SUM($1)], agg#2=[COUNT($1)], agg#3=[SUM($2)], agg#4=[SUM($3)], 
agg#5=[COUNT($3)], ctx=[])
-        KapProjectRel($f0=[*($0, 2)], $f1=[*(/($1, $2), 2)], $f3=[*(/($1, $2), 
/($1, $2))], PRI=[/($1, $2)], ctx=[])
-          KapAggregateRel(group-set=[[]], groups=[null], A1=[SUM($0)], 
A2=[SUM($1)], agg#2=[COUNT($1)], ctx=[])
-            KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[])
-              KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], 
fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
+        KapProjectRel($f0=[$0], $f1=[$1], $f3=[*($2, $2)], PRI=[$2], ctx=[])
+          KapProjectRel($f0=[*($0, 2)], $f1=[*(/($1, $2), 2)], PRI=[/($1, 
$2)], ctx=[])
+            KapAggregateRel(group-set=[[]], groups=[null], A1=[SUM($0)], 
A2=[SUM($1)], agg#2=[COUNT($1)], ctx=[])
+              KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[])
+                KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], 
fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
     KapProjectRel(EXPR=[$0], PRI=[CAST(/($1, $2)):BIGINT], PPP=[CAST(/(-($3, 
/(*($4, $4), $5)), $5)):BIGINT], ctx=[])
       KapAggregateRel(group-set=[[]], groups=[null], EXPR=[SUM($0)], 
agg#1=[SUM($1)], agg#2=[COUNT($1)], agg#3=[SUM($2)], agg#4=[SUM($3)], 
agg#5=[COUNT($3)], ctx=[])
-        KapProjectRel($f0=[*($1, 2)], $f1=[*(/($2, $3), 2)], $f3=[*(/($2, $3), 
/($2, $3))], PRI=[/($2, $3)], ctx=[])
-          KapAggregateRel(group-set=[[]], groups=[null], MIN_PRI=[MIN($0)], 
A1=[SUM($0)], A2=[SUM($1)], agg#3=[COUNT($1)], ctx=[])
-            KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[])
-              KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], 
fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
+        KapProjectRel($f0=[$0], $f1=[$1], $f3=[*($2, $2)], PRI=[$2], ctx=[])
+          KapProjectRel($f0=[*($1, 2)], $f1=[*(/($2, $3), 2)], PRI=[/($2, 
$3)], ctx=[])
+            KapAggregateRel(group-set=[[]], groups=[null], MIN_PRI=[MIN($0)], 
A1=[SUM($0)], A2=[SUM($1)], agg#3=[COUNT($1)], ctx=[])
+              KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[])
+                KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], 
fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]])
 ]]>
         </Resource>
         <Resource name="query12.planBefore">
diff --git 
a/src/query/src/main/java/io/kyligence/kap/query/optrule/KapProjectMergeRule.java
 
b/src/query/src/main/java/io/kyligence/kap/query/optrule/KapProjectMergeRule.java
index f163ad7caa..8aefc1d645 100644
--- 
a/src/query/src/main/java/io/kyligence/kap/query/optrule/KapProjectMergeRule.java
+++ 
b/src/query/src/main/java/io/kyligence/kap/query/optrule/KapProjectMergeRule.java
@@ -36,6 +36,7 @@ import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.tools.RelBuilderFactory;
 import org.apache.calcite.util.Permutation;
+import org.apache.kylin.common.KylinConfig;
 
 import com.google.common.collect.Sets;
 
@@ -54,7 +55,6 @@ import com.google.common.collect.Sets;
  *
  */
 public class KapProjectMergeRule extends RelOptRule {
-
     public static final KapProjectMergeRule INSTANCE = new 
KapProjectMergeRule(true, RelFactories.LOGICAL_BUILDER);
 
     //~ Instance fields 
--------------------------------------------------------
@@ -120,7 +120,17 @@ public class KapProjectMergeRule extends RelOptRule {
             return;
         }
 
-        final List<RexNode> pushedProjects = 
RelOptUtil.pushPastProject(topProject.getProjects(), bottomProject);
+        final List<RexNode> pushedProjects;
+        if (KylinConfig.getInstanceFromEnv().isProjectMergeWithBloatEnabled()) 
{
+            pushedProjects = 
RelOptUtil.pushPastProjectUnlessBloat(topProject.getProjects(),
+                    bottomProject, 
KylinConfig.getInstanceFromEnv().getKapProjectMergeRuleBloatThreshold());
+            if (pushedProjects == null) {
+                // Merged projects are significantly more complex. Do not 
merge.
+                return;
+            }
+        } else {
+            pushedProjects = 
RelOptUtil.pushPastProject(topProject.getProjects(), bottomProject);
+        }
         final List<RexNode> newProjects = simplify(pushedProjects);
         final RelNode input = bottomProject.getInput();
         if (RexUtil.isIdentity(newProjects, input.getRowType())

Reply via email to