This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new a7f3c219834 branch-2.1: [opt](nereids) opt range inference for or 
expression when out of order #46303 (#53706)
a7f3c219834 is described below

commit a7f3c219834f86675a852ced39f602140e81d4a3
Author: seawinde <[email protected]>
AuthorDate: Fri Jul 25 16:49:06 2025 +0800

    branch-2.1: [opt](nereids) opt range inference for or expression when out 
of order #46303 (#53706)
    
    picked from part of #46303
---
 .../rules/expression/rules/SimplifyRange.java      | 44 +++++++++++++++++++---
 .../rules/expression/SimplifyRangeTest.java        | 40 +++++++++++++-------
 2 files changed, 65 insertions(+), 19 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
index d5fd8e24783..434f7a6f5bd 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
@@ -46,7 +46,9 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Multimap;
 import com.google.common.collect.Multimaps;
 import com.google.common.collect.Range;
+import com.google.common.collect.RangeSet;
 import com.google.common.collect.Sets;
+import com.google.common.collect.TreeRangeSet;
 
 import java.util.ArrayList;
 import java.util.Collection;
@@ -157,18 +159,18 @@ public class SimplifyRange implements 
ExpressionPatternRuleFactory {
         @Override
         public ValueDesc visitAnd(And and, ExpressionRewriteContext context) {
             return simplify(context, and, 
ExpressionUtils.extractConjunction(and),
-                    ValueDesc::intersect, ExpressionUtils::and);
+                    ValueDesc::intersect, ExpressionUtils::and, true);
         }
 
         @Override
         public ValueDesc visitOr(Or or, ExpressionRewriteContext context) {
             return simplify(context, or, 
ExpressionUtils.extractDisjunction(or),
-                    ValueDesc::union, ExpressionUtils::or);
+                    ValueDesc::union, ExpressionUtils::or, false);
         }
 
         private ValueDesc simplify(ExpressionRewriteContext context,
                 Expression originExpr, List<Expression> predicates,
-                BinaryOperator<ValueDesc> op, BinaryOperator<Expression> 
exprOp) {
+                BinaryOperator<ValueDesc> op, BinaryOperator<Expression> 
exprOp, boolean isAnd) {
 
             Multimap<Expression, ValueDesc> groupByReference
                     = Multimaps.newListMultimap(new LinkedHashMap<>(), 
ArrayList::new);
@@ -181,7 +183,9 @@ public class SimplifyRange implements 
ExpressionPatternRuleFactory {
             List<ValueDesc> valuePerRefs = Lists.newArrayList();
             for (Entry<Expression, Collection<ValueDesc>> referenceValues : 
groupByReference.asMap().entrySet()) {
                 List<ValueDesc> valuePerReference = (List) 
referenceValues.getValue();
-
+                if (!isAnd) {
+                    valuePerReference = unionDiscreteAndRange(context, 
referenceValues.getKey(), valuePerReference);
+                }
                 // merge per reference
                 ValueDesc simplifiedValue = valuePerReference.get(0);
                 for (int i = 1; i < valuePerReference.size(); i++) {
@@ -200,6 +204,30 @@ public class SimplifyRange implements 
ExpressionPatternRuleFactory {
         }
     }
 
+    /** merge discrete and ranges only, no merge other value desc */
+    public static List<ValueDesc> 
unionDiscreteAndRange(ExpressionRewriteContext context,
+            Expression reference, List<ValueDesc> valueDescs) {
+        List<ValueDesc> result = 
Lists.newArrayListWithExpectedSize(valueDescs.size());
+
+        // for (a >= 8 and a < 9) or (a >=12 and a < 13) or (a >=13 and a < 
14) can convert to
+        // (a >= 8 and a < 9) or (a >=12 and a < 14)
+        RangeSet<Literal> rangeSet = TreeRangeSet.create();
+        for (ValueDesc valueDesc : valueDescs) {
+            if (valueDesc instanceof RangeValue) {
+                Range<Literal> range = ((RangeValue) valueDesc).range;
+                rangeSet.add(range);
+            } else {
+                result.add(valueDesc);
+            }
+        }
+        for (Range<Literal> range : rangeSet.asRanges()) {
+            RangeValue rangeValue = new RangeValue(context, reference, 
RangeValue.toExpression(range, reference));
+            rangeValue.range = range;
+            result.add(rangeValue);
+        }
+        return result;
+    }
+
     private abstract static class ValueDesc {
         ExpressionRewriteContext context;
         Expression toExpr;
@@ -357,7 +385,9 @@ public class SimplifyRange implements 
ExpressionPatternRuleFactory {
                 if (range.isConnected(o.range)) {
                     RangeValue rangeValue = new RangeValue(context, reference, 
originExpr);
                     rangeValue.range = range.intersection(o.range);
-                    return rangeValue;
+                    if (!rangeValue.range.isEmpty()) {
+                        return rangeValue;
+                    }
                 }
                 return new EmptyValue(context, reference, originExpr);
             }
@@ -372,6 +402,10 @@ public class SimplifyRange implements 
ExpressionPatternRuleFactory {
 
         @Override
         public Expression toExpression() {
+            return toExpression(this.range, this.reference);
+        }
+
+        public static Expression toExpression(Range<Literal> range, Expression 
reference) {
             List<Expression> result = Lists.newArrayList();
             if (range.hasLowerBound()) {
                 if (range.lowerBoundType() == BoundType.CLOSED) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyRangeTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyRangeTest.java
index 79906880f53..ca2cab9905b 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyRangeTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyRangeTest.java
@@ -65,6 +65,8 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         executor = new ExpressionRuleExecutor(ImmutableList.of(
             bottomUp(SimplifyRange.INSTANCE)
         ));
+        assertRewrite("(TA >= 8 and TA < 8) or (TA >= 8 and TA < 8)", "TA is 
null and null");
+        assertRewrite("(TA >=12 and TA < 13) or (TA >= 15 and TA < 16) or (TA 
>= 16 and TA < 17)", "(TA >=12 and TA < 13) or (TA >=15 and TA < 17)");
         assertRewrite("TA", "TA");
         assertRewrite("TA > 3 or TA > null", "TA > 3 OR NULL");
         assertRewrite("TA > 3 or TA < null", "TA > 3 OR NULL");
@@ -85,16 +87,16 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         assertRewrite("(TA > 3 and TA < 1) or (TA > 7 and TA < 5)", "TA is 
null and null");
         assertRewriteNotNull("TA > 3 and TA < 1", "FALSE");
         assertRewrite("TA > 3 and TA < 1", "TA is null and null");
-        assertRewrite("TA >= 3 and TA < 3", "TA >= 3 and TA < 3");
+        assertRewrite("TA >= 3 and TA < 3", "TA is null and null");
         assertRewriteNotNull("TA = 1 and TA > 10", "FALSE");
         assertRewrite("TA = 1 and TA > 10", "TA is null and null");
-        assertRewrite("TA > 5 or TA < 1", "TA > 5 or TA < 1");
+        assertRewrite("TA > 5 or TA < 1", "TA < 1 or TA > 5");
         assertRewrite("TA > 5 or TA > 1 or TA > 10", "TA > 1");
         assertRewrite("TA > 5 or TA > 1 or TA < 10", "TA is not null or null");
         assertRewriteNotNull("TA > 5 or TA > 1 or TA < 10", "TRUE");
         assertRewrite("TA > 5 and TA > 1 and TA > 10", "TA > 10");
         assertRewrite("TA > 5 and TA > 1 and TA < 10", "TA > 5 and TA < 10");
-        assertRewrite("TA > 1 or TA < 1", "TA > 1 or TA < 1");
+        assertRewrite("TA > 1 or TA < 1", "TA < 1 or TA > 1");
         assertRewrite("TA > 1 or TA < 10", "TA is not null or null");
         assertRewriteNotNull("TA > 1 or TA < 10", "TRUE");
         assertRewrite("TA > 5 and TA < 10", "TA > 5 and TA < 10");
@@ -109,7 +111,7 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         assertRewrite("(TA > 10 or TA > 20) and (TB > 10 and TB > 20)", "TA > 
10 and TB > 20");
         assertRewrite("((TB > 30 and TA > 40) and TA > 20) and (TB > 10 and TB 
> 20)", "TB > 30 and TA > 40");
         assertRewrite("(TA > 10 and TB > 10) or (TB > 10 and TB > 20)", "TA > 
10 and TB > 10 or TB > 20");
-        assertRewrite("((TA > 10 or TA > 5) and TB > 10) or (TB > 10 and (TB > 
20 or TB < 10))", "(TA > 5 and TB > 10) or (TB > 10 and (TB > 20 or TB < 10))");
+        assertRewrite("((TA > 10 or TA > 5) and TB > 10) or (TB > 10 and (TB > 
20 or TB < 10))", "(TA > 5 and TB > 10) or (TB > 10 and (TB < 10 or TB > 20))");
         assertRewriteNotNull("TA in (1,2,3) and TA > 10", "FALSE");
         assertRewrite("TA in (1,2,3) and TA > 10", "TA is null and null");
         assertRewrite("TA in (1,2,3) and TA >= 1", "TA in (1,2,3)");
@@ -147,15 +149,15 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         assertRewrite("(TA + TC > 3 and TA + TC < 1) or (TA + TC > 7 and TA + 
TC < 5)", "(TA + TC) is null and null");
         assertRewriteNotNull("TA + TC > 3 and TA + TC < 1", "FALSE");
         assertRewrite("TA + TC > 3 and TA + TC < 1", "(TA + TC) is null and 
null");
-        assertRewrite("TA + TC >= 3 and TA + TC < 3", "TA + TC >= 3 and TA + 
TC < 3");
+        assertRewrite("TA + TC >= 3 and TA + TC < 3", "TA + TC is null and 
null");
         assertRewriteNotNull("TA + TC = 1 and TA + TC > 10", "FALSE");
         assertRewrite("TA + TC = 1 and TA + TC > 10", "(TA + TC) is null and 
null");
-        assertRewrite("TA + TC > 5 or TA + TC < 1", "TA + TC > 5 or TA + TC < 
1");
+        assertRewrite("TA + TC > 5 or TA + TC < 1", "TA + TC < 1 or TA + TC > 
5");
         assertRewrite("TA + TC > 5 or TA + TC > 1 or TA + TC > 10", "TA + TC > 
1");
         assertRewrite("TA + TC > 5 or TA + TC > 1 or TA + TC < 10", "(TA + TC) 
is not null or null");
         assertRewrite("TA + TC > 5 and TA + TC > 1 and TA + TC > 10", "TA + TC 
> 10");
         assertRewrite("TA + TC > 5 and TA + TC > 1 and TA + TC < 10", "TA + TC 
> 5 and TA + TC < 10");
-        assertRewrite("TA + TC > 1 or TA + TC < 1", "TA + TC > 1 or TA + TC < 
1");
+        assertRewrite("TA + TC > 1 or TA + TC < 1", "TA + TC < 1 or TA + TC > 
1");
         assertRewrite("TA + TC > 1 or TA + TC < 10", "(TA + TC) is not null or 
null");
         assertRewrite("TA + TC > 5 and TA + TC < 10", "TA + TC > 5 and TA + TC 
< 10");
         assertRewrite("TA + TC > 5 and TA + TC > 10", "TA + TC > 10");
@@ -168,7 +170,7 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         assertRewrite("(TA + TC > 10 or TA + TC > 20) and (TB > 10 and TB > 
20)", "TA + TC > 10 and TB > 20");
         assertRewrite("((TB > 30 and TA + TC > 40) and TA + TC > 20) and (TB > 
10 and TB > 20)", "TB > 30 and TA + TC > 40");
         assertRewrite("(TA + TC > 10 and TB > 10) or (TB > 10 and TB > 20)", 
"TA + TC > 10 and TB > 10 or TB > 20");
-        assertRewrite("((TA + TC > 10 or TA + TC > 5) and TB > 10) or (TB > 10 
and (TB > 20 or TB < 10))", "(TA + TC > 5 and TB > 10) or (TB > 10 and (TB > 20 
or TB < 10))");
+        assertRewrite("((TA + TC > 10 or TA + TC > 5) and TB > 10) or (TB > 10 
and (TB > 20 or TB < 10))", "(TA + TC > 5 and TB > 10) or (TB > 10 and (TB < 10 
or TB > 20))");
         assertRewriteNotNull("TA + TC in (1,2,3) and TA + TC > 10", "FALSE");
         assertRewrite("TA + TC in (1,2,3) and TA + TC > 10", "(TA + TC) is 
null and null");
         assertRewrite("TA + TC in (1,2,3) and TA + TC >= 1", "TA + TC in 
(1,2,3)");
@@ -204,6 +206,9 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         executor = new ExpressionRuleExecutor(ImmutableList.of(
             bottomUp(SimplifyRange.INSTANCE)
         ));
+        assertRewrite(
+                "(AA >= date '2024-01-01' and AA < date '2024-01-02') or (AA 
>= date '2024-01-05' and AA < date '2024-01-06') or (AA >= date '2024-01-06' 
and AA < date '2024-01-07')",
+                "(AA >= date '2024-01-01' and AA < date '2024-01-02') or (AA 
>= date '2024-01-05' and AA < date '2024-01-07')");
         assertRewrite("AA", "AA");
         assertRewrite(
                 "(AA >= date '2024-01-01' and AA <= date '2024-01-03') or (AA 
> date '2024-01-05' and AA < date '2024-01-07')",
@@ -217,11 +222,13 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         assertRewriteNotNull("AA > date '2024-01-03' and AA < date 
'2024-01-01'", "FALSE");
         assertRewrite("AA > date '2024-01-03' and AA < date '2024-01-01'", "AA 
is null and null");
         assertRewrite("AA >= date '2024-01-01' and AA < date '2024-01-01'",
-                "AA >= date '2024-01-01' and AA < date '2024-01-01'");
+                "AA is null and null");
+        assertRewrite("(AA >= date '2024-01-01' and AA < date '2024-01-01') or 
(AA >= date '2024-01-01' and AA < date '2024-01-01')",
+                "AA is null and null");
         assertRewriteNotNull("AA = date '2024-01-01' and AA > date 
'2024-01-10'", "FALSE");
         assertRewrite("AA = date '2024-01-01' and AA > date '2024-01-10'", "AA 
is null and null");
         assertRewrite("AA > date '2024-01-05' or AA < date '2024-01-01'",
-                "AA > date '2024-01-05' or AA < date '2024-01-01'");
+                "AA < date '2024-01-01' or AA > date '2024-01-05'");
         assertRewrite("AA > date '2024-01-05' or AA > date '2024-01-01' or AA 
> date '2024-01-10'",
                 "AA > date '2024-01-01'");
         assertRewrite("AA > date '2024-01-05' or AA > date '2024-01-01' or AA 
< date '2024-01-10'", "AA is not null or null");
@@ -231,7 +238,7 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         assertRewrite("AA > date '2024-01-05' and AA > date '2024-01-01' and 
AA < date '2024-01-10'",
                 "AA > date '2024-01-05' and AA < date '2024-01-10'");
         assertRewrite("AA > date '2024-01-05' or AA < date '2024-01-05'",
-                "AA > date '2024-01-05' or AA < date '2024-01-05'");
+                "AA < date '2024-01-05' or AA > date '2024-01-05'");
         assertRewrite("AA > date '2024-01-01' or AA < date '2024-01-10'", "AA 
is not null or null");
         assertRewriteNotNull("AA > date '2024-01-01' or AA < date 
'2024-01-10'", "TRUE");
         assertRewrite("AA > date '2024-01-05' and AA < date '2024-01-10'",
@@ -285,6 +292,9 @@ public class SimplifyRangeTest extends ExpressionRewrite {
             bottomUp(SimplifyRange.INSTANCE)
         ));
         assertRewrite("CA", "CA");
+        assertRewrite(
+                "(CA >= timestamp '2024-01-01 00:00:00' and CA < timestamp 
'2024-01-02 00:00:00') or (CA >= timestamp '2024-01-05 00:00:00' and CA < 
timestamp '2024-01-07 00:00:00') or (CA >= timestamp '2024-01-07 00:00:00' and 
CA < timestamp '2024-01-08 00:00:00')",
+                "(CA >= timestamp '2024-01-01 00:00:00' and CA < timestamp 
'2024-01-02 00:00:00') or (CA >= timestamp '2024-01-05 00:00:00' and CA < 
timestamp '2024-01-08 00:00:00')");
         assertRewrite(
                 "(CA >= timestamp '2024-01-01 00:00:00' and CA <= timestamp 
'2024-01-03 00:00:00') or (CA > timestamp '2024-01-05 00:00:00' and CA < 
timestamp '2024-01-07 00:00:00')",
                 "(CA >= timestamp '2024-01-01 00:00:00' and CA <= timestamp 
'2024-01-03 00:00:00') or (CA > timestamp '2024-01-05 00:00:00' and CA < 
timestamp '2024-01-07 00:00:00')");
@@ -297,11 +307,13 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         assertRewriteNotNull("CA > timestamp '2024-01-03 00:00:10' and CA < 
timestamp '2024-01-01 01:00:00'", "FALSE");
         assertRewrite("CA > timestamp '2024-01-03 00:00:10' and CA < timestamp 
'2024-01-01 01:00:00'", "CA is null and null");
         assertRewrite("CA >= timestamp '2024-01-01 00:00:10' and CA < 
timestamp '2024-01-01 00:00:10'",
-                "CA >= timestamp '2024-01-01 00:00:10' and CA < timestamp 
'2024-01-01 00:00:10'");
+                "CA is null and null");
+        assertRewrite("(CA >= timestamp '2024-01-01 00:00:10' and CA < 
timestamp '2024-01-01 00:00:10') or (CA >= timestamp '2024-01-01 00:00:10' and 
CA < timestamp '2024-01-01 00:00:10')",
+                "CA is null and null");
         assertRewriteNotNull("CA = timestamp '2024-01-01 10:00:10' and CA > 
timestamp '2024-01-10 00:00:10'", "FALSE");
         assertRewrite("CA = timestamp '2024-01-01 10:00:10' and CA > timestamp 
'2024-01-10 00:00:10'", "CA is null and null");
         assertRewrite("CA > timestamp '2024-01-05 00:00:10' or CA < timestamp 
'2024-01-01 00:00:10'",
-                "CA > timestamp '2024-01-05 00:00:10' or CA < timestamp 
'2024-01-01 00:00:10'");
+                "CA < timestamp '2024-01-01 00:00:10' or CA > timestamp 
'2024-01-05 00:00:10'");
         assertRewrite("CA > timestamp '2024-01-05 00:00:10' or CA > timestamp 
'2024-01-01 00:00:10' or CA > timestamp '2024-01-10 00:00:10'",
                 "CA > timestamp '2024-01-01 00:00:10'");
         assertRewrite("CA > timestamp '2024-01-05 00:00:10' or CA > timestamp 
'2024-01-01 00:00:10' or CA < timestamp '2024-01-10 00:00:10'", "CA is not null 
or null");
@@ -311,7 +323,7 @@ public class SimplifyRangeTest extends ExpressionRewrite {
         assertRewrite("CA > timestamp '2024-01-05 00:00:10' and CA > timestamp 
'2024-01-01 00:00:10' and CA < timestamp '2024-01-10 00:00:10'",
                 "CA > timestamp '2024-01-05 00:00:10' and CA < timestamp 
'2024-01-10 00:00:10'");
         assertRewrite("CA > timestamp '2024-01-05 00:00:10' or CA < timestamp 
'2024-01-05 00:00:10'",
-                "CA > timestamp '2024-01-05 00:00:10' or CA < timestamp 
'2024-01-05 00:00:10'");
+                "CA < timestamp '2024-01-05 00:00:10' or CA > timestamp 
'2024-01-05 00:00:10'");
         assertRewrite("CA > timestamp '2024-01-01 00:02:10' or CA < timestamp 
'2024-01-10 00:02:10'", "CA is not null or null");
         assertRewriteNotNull("CA > timestamp '2024-01-01 00:00:00' or CA < 
timestamp '2024-01-10 00:00:00'", "TRUE");
         assertRewrite("CA > timestamp '2024-01-05 01:00:00' and CA < timestamp 
'2024-01-10 01:00:00'",


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to