This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new d6ed9cce842 branch-3.1: [feature](nereids) in predicate extract non 
constant expressions #46794 (#51987)
d6ed9cce842 is described below

commit d6ed9cce8426f1e4eeb775353b7f6a18ff40097c
Author: yujun <[email protected]>
AuthorDate: Mon Jun 23 19:29:17 2025 +0800

    branch-3.1: [feature](nereids) in predicate extract non constant 
expressions #46794 (#51987)
    
    Cherry-pick from #46794
---
 .../rules/expression/ExpressionNormalization.java  |  2 +
 .../rules/expression/rules/InPredicateDedup.java   | 16 ++---
 .../rules/InPredicateExtractNonConstant.java       | 75 ++++++++++++++++++++++
 .../rules/expression/rules/SimplifyRange.java      |  3 +-
 .../rules/expression/ExpressionRewriteTest.java    | 15 +++++
 .../rules/InPredicateExtractNonConstantTest.java   | 47 ++++++++++++++
 6 files changed, 149 insertions(+), 9 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java
index 0e52f2aaaf6..4d01970a7f7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java
@@ -22,6 +22,7 @@ import 
org.apache.doris.nereids.rules.expression.rules.ConvertAggStateCast;
 import org.apache.doris.nereids.rules.expression.rules.DigitalMaskingConvert;
 import org.apache.doris.nereids.rules.expression.rules.FoldConstantRule;
 import org.apache.doris.nereids.rules.expression.rules.InPredicateDedup;
+import 
org.apache.doris.nereids.rules.expression.rules.InPredicateExtractNonConstant;
 import 
org.apache.doris.nereids.rules.expression.rules.InPredicateToEqualToRule;
 import org.apache.doris.nereids.rules.expression.rules.MergeDateTrunc;
 import 
org.apache.doris.nereids.rules.expression.rules.NormalizeBinaryPredicatesRule;
@@ -46,6 +47,7 @@ public class ExpressionNormalization extends 
ExpressionRewrite {
                 SupportJavaDateFormatter.INSTANCE,
                 NormalizeBinaryPredicatesRule.INSTANCE,
                 InPredicateDedup.INSTANCE,
+                InPredicateExtractNonConstant.INSTANCE,
                 InPredicateToEqualToRule.INSTANCE,
                 SimplifyNotExprRule.INSTANCE,
                 SimplifyArithmeticRule.INSTANCE,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateDedup.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateDedup.java
index 3760dcf0e72..cc57b3ed30f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateDedup.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateDedup.java
@@ -35,28 +35,28 @@ import java.util.Set;
 public class InPredicateDedup implements ExpressionPatternRuleFactory {
     public static final InPredicateDedup INSTANCE = new InPredicateDedup();
 
+    // In many BI scenarios, the sql is auto-generated, and hence there may be 
thousands of options.
+    // It takes a long time to apply this rule. So set a threshold for the max 
number.
+    public static final int REWRITE_OPTIONS_MAX_SIZE = 200;
+
     @Override
     public List<ExpressionPatternMatcher<? extends Expression>> buildRules() {
         return ImmutableList.of(
-            matchesType(InPredicate.class).then(InPredicateDedup::dedup)
+            matchesType(InPredicate.class)
+                    .when(inPredicate -> inPredicate.getOptions().size() <= 
REWRITE_OPTIONS_MAX_SIZE)
+                    .then(InPredicateDedup::dedup)
         );
     }
 
     /** dedup */
     public static Expression dedup(InPredicate inPredicate) {
-        // In many BI scenarios, the sql is auto-generated, and hence there 
may be thousands of options.
-        // It takes a long time to apply this rule. So set a threshold for the 
max number.
-        int optionSize = inPredicate.getOptions().size();
-        if (optionSize > 200) {
-            return inPredicate;
-        }
         ImmutableSet.Builder<Expression> newOptionsBuilder = 
ImmutableSet.builderWithExpectedSize(inPredicate.arity());
         for (Expression option : inPredicate.getOptions()) {
             newOptionsBuilder.add(option);
         }
 
         Set<Expression> newOptions = newOptionsBuilder.build();
-        if (newOptions.size() == optionSize) {
+        if (newOptions.size() == inPredicate.getOptions().size()) {
             return inPredicate;
         }
         return new InPredicate(inPredicate.getCompareExpr(), newOptions);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateExtractNonConstant.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateExtractNonConstant.java
new file mode 100644
index 00000000000..7e21b923e72
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateExtractNonConstant.java
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.expression.rules;
+
+import org.apache.doris.nereids.rules.expression.ExpressionPatternMatcher;
+import org.apache.doris.nereids.rules.expression.ExpressionPatternRuleFactory;
+import org.apache.doris.nereids.trees.expressions.EqualTo;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.InPredicate;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Sets;
+import org.apache.hadoop.util.Lists;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Extract non-constant of InPredicate, For example:
+ * where k1 in (k2, k3, 10, 20, 30) ==> where k1 in (10, 20, 30) or k1 = k2 or 
k1 = k3.
+ * It's because backend handle in predicate which contains none-constant 
column will reduce performance.
+ */
+public class InPredicateExtractNonConstant implements 
ExpressionPatternRuleFactory {
+    public static final InPredicateExtractNonConstant INSTANCE = new 
InPredicateExtractNonConstant();
+
+    @Override
+    public List<ExpressionPatternMatcher<? extends Expression>> buildRules() {
+        return ImmutableList.of(
+                matchesType(InPredicate.class)
+                        .when(inPredicate -> inPredicate.getOptions().size()
+                                <= InPredicateDedup.REWRITE_OPTIONS_MAX_SIZE)
+                        .then(this::rewrite)
+        );
+    }
+
+    private Expression rewrite(InPredicate inPredicate) {
+        Set<Expression> nonConstants = 
Sets.newLinkedHashSetWithExpectedSize(inPredicate.arity());
+        for (Expression option : inPredicate.getOptions()) {
+            if (!option.isConstant()) {
+                nonConstants.add(option);
+            }
+        }
+        if (nonConstants.isEmpty()) {
+            return inPredicate;
+        }
+        Expression key = inPredicate.getCompareExpr();
+        List<Expression> disjunctions = 
Lists.newArrayListWithExpectedSize(inPredicate.getOptions().size());
+        List<Expression> constants = 
inPredicate.getOptions().stream().filter(Expression::isConstant)
+                .collect(Collectors.toList());
+        if (!constants.isEmpty()) {
+            disjunctions.add(ExpressionUtils.toInPredicateOrEqualTo(key, 
constants));
+        }
+        for (Expression option : nonConstants) {
+            disjunctions.add(new EqualTo(key, option));
+        }
+        return ExpressionUtils.or(disjunctions);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
index 84261a18fe2..5ebe8256312 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
@@ -146,7 +146,8 @@ public class SimplifyRange implements 
ExpressionPatternRuleFactory {
         @Override
         public ValueDesc visitInPredicate(InPredicate inPredicate, 
ExpressionRewriteContext context) {
             // only handle `NumericType` and `DateLikeType`
-            if (ExpressionUtils.isAllNonNullLiteral(inPredicate.getOptions())
+            if (inPredicate.getOptions().size() <= 
InPredicateDedup.REWRITE_OPTIONS_MAX_SIZE
+                    && 
ExpressionUtils.isAllNonNullLiteral(inPredicate.getOptions())
                     && 
(ExpressionUtils.matchNumericType(inPredicate.getOptions())
                     || 
ExpressionUtils.matchDateLikeType(inPredicate.getOptions()))) {
                 return ValueDesc.discrete(context, inPredicate);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
index c89a823a9f3..387bdd2a721 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
@@ -20,6 +20,7 @@ package org.apache.doris.nereids.rules.expression;
 import org.apache.doris.nereids.rules.expression.rules.DistinctPredicatesRule;
 import org.apache.doris.nereids.rules.expression.rules.ExtractCommonFactorRule;
 import org.apache.doris.nereids.rules.expression.rules.InPredicateDedup;
+import 
org.apache.doris.nereids.rules.expression.rules.InPredicateExtractNonConstant;
 import 
org.apache.doris.nereids.rules.expression.rules.InPredicateToEqualToRule;
 import 
org.apache.doris.nereids.rules.expression.rules.NormalizeBinaryPredicatesRule;
 import org.apache.doris.nereids.rules.expression.rules.SimplifyCastRule;
@@ -259,4 +260,18 @@ class ExpressionRewriteTest extends 
ExpressionRewriteTestHelper {
 
         assertRewrite("a and (b > 0 and b < 10)", "a and (b > 0 and b < 10)");
     }
+
+    @Test
+    public void testInPredicateExtractNonConstant() {
+        executor = new ExpressionRuleExecutor(ImmutableList.of(
+                bottomUp(
+                        InPredicateExtractNonConstant.INSTANCE
+                )
+        ));
+
+        assertRewriteAfterTypeCoercion("TA in (3, 2, 1)", "TA in (3, 2, 1)");
+        assertRewriteAfterTypeCoercion("TA in (TB, TC, TB)", "TA = TB or TA = 
TC");
+        assertRewriteAfterTypeCoercion("TA in (3, 2, 1, TB, TC, TB)", "TA in 
(3, 2, 1) or TA = TB or TA = TC");
+        assertRewriteAfterTypeCoercion("IA in (1 + 2, 2 + 3, 3 + TB)", "IA in 
(cast(1 + 2 as int), cast(2 + 3 as int)) or IA = cast(3 + TB as int)");
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/InPredicateExtractNonConstantTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/InPredicateExtractNonConstantTest.java
new file mode 100644
index 00000000000..a574f9c4881
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/InPredicateExtractNonConstantTest.java
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.expression.rules;
+
+import org.apache.doris.nereids.sqltest.SqlTestBase;
+import org.apache.doris.nereids.util.PlanChecker;
+
+import org.junit.jupiter.api.Test;
+
+class InPredicateExtractNonConstantTest extends SqlTestBase {
+    @Test
+    public void testExtractNonConstant() {
+        
connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION");
+        String sql = "select * from T1 where id in (score, score, score + 
100)";
+        PlanChecker.from(connectContext)
+                .analyze(sql)
+                .rewrite()
+                .matches(
+                        logicalFilter().when(f -> 
f.getPredicate().toSql().equals(
+                                "OR[(id = score),(id = (score + 100))]"
+                        )));
+
+        sql = "select * from T1 where id in (score,  score + 10, score + 
score, score, 10, 20, 30, 100 + 200)";
+        PlanChecker.from(connectContext)
+                .analyze(sql)
+                .rewrite()
+                .matches(
+                        logicalFilter().when(f -> 
f.getPredicate().toSql().equals(
+                                "OR[id IN (10, 20, 30, 300),(id = score),(id = 
(score + 10)),(id = (score + score))]"
+                )));
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to