This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 3aa1a87fd02 [fix](Nereids) string literal coercion of in predicate 
(#35358)
3aa1a87fd02 is described below

commit 3aa1a87fd02fce845417b861456bd4a3d2f826c3
Author: wangqt <[email protected]>
AuthorDate: Mon May 27 10:26:15 2024 +0800

    [fix](Nereids) string literal coercion of in predicate (#35358)
    
    pick from master #35200
    
    Description:
       The sql execute much slow when the literal value with string format in 
`in predicate`; and the real data is integral type。
    ```
    mysql> set enable_nereids_planner = false;
    Query OK, 0 rows affected (0.03 sec)
    
    mysql> select id,sum(clicks) from a_table where id in ('787934713', 
'306960695') group by id limit 10;
    +------------+---------------+
    | id | sum(`clicks`) |
    +------------+---------------+
    |  787934713 |          2838 |
    |  306960695 |           339 |
    +------------+---------------+
    2 rows in set (1.81 sec)
    
    mysql> set enable_nereids_planner = true;
    Query OK, 0 rows affected (0.02 sec)
    
    mysql> select id,sum(clicks) from a_table where id in ('787934713', 
'306960695') group by id limit 10;
    +------------+-------------+
    | id | sum(clicks) |
    +------------+-------------+
    |  787934713 |        2838 |
    |  306960695 |         339 |
    +------------+-------------+
    2 rows in set (28.14 sec)
    ```
    
    Reason:
    In legacy planner, the string literal with convert to integral value, but 
in the nereids planner do not do this convert and with do string matching in BE。
    
    Solved:
    do process string literal with numeric in `in predicate` like in 
`comparison predicate`;
    test table:
    ```
    create table a_table(
        k1 BIGINT NOT NULL,
        k2 VARCHAR(100) NOT NULL,
        v1 INT SUM NULL DEFAULT "0"
    ) ENGINE=OLAP
    AGGREGATE KEY(k1,k2)
    distributed BY hash(k1) buckets 2
    properties("replication_num" = "1");
    insert into a_table values (10, 'name1', 10),(20, 'name2', 10);
    explain plan select * from a_table where k1 in ('10', '20001');
    ```
    before optimize:
    ```
    
+--------------------------------------------------------------------------------------------------------------------------------------+
    | Explain String(Nereids Planner)                                           
                                                           |
    
+--------------------------------------------------------------------------------------------------------------------------------------+
    | ========== PARSED PLAN (time: 1ms) ==========                             
                                                           |
    | UnboundResultSink[4] (  )                                                 
                                                           |
    | +--LogicalProject[3] ( distinct=false, projects=[*], excepts=[] )         
                                                           |
    |    +--LogicalFilter[2] ( predicates='k1 IN ('10001', '20001') )           
                                                           |
    |       +--LogicalCheckPolicy (  )                                          
                                                           |
    |          +--UnboundRelation ( id=RelationId#0, nameParts=a_table )        
                                                           |
    |                                                                           
                                                           |
    | ========== ANALYZED PLAN (time: 2ms) ==========                           
                                                           |
    | LogicalResultSink[15] ( outputExprs=[k1#0, k2#1, v1#2] )                  
                                                           |
    | +--LogicalProject[13] ( distinct=false, projects=[k1#0, k2#1, v1#2], 
excepts=[] )                                                    |
    |    +--LogicalFilter[11] ( predicates=cast(k1#0 as TEXT) IN ('10001', 
'20001') )                                                      |
    |       +--LogicalOlapScan ( qualified=internal.db.a_table, 
indexName=<index_not_selected>, selectedIndexId=12003, preAgg=UNSET )      |
    |                                                                           
                                                           |
    | ========== REWRITTEN PLAN (time: 6ms) ==========                          
                                                           |
    | LogicalResultSink[45] ( outputExprs=[k1#0, k2#1, v1#2] )                  
                                                           |
    | +--LogicalFilter[43] ( predicates=cast(k1#0 as TEXT) IN ('10001', 
'20001') )                                                         |
    |    +--LogicalOlapScan ( qualified=internal.db.a_table, indexName=a_table, 
selectedIndexId=12003, preAgg=OFF, No aggregate on scan. ) |
    |                                                                           
                                                           |
    | ========== OPTIMIZED PLAN (time: 6ms) ==========                          
                                                           |
    | PhysicalResultSink[90] ( outputExprs=[k1#0, k2#1, v1#2] )                 
                                                           |
    | +--PhysicalDistribute[87]@1 ( stats=0.33, 
distributionSpec=DistributionSpecGather )                                       
           |
    |    +--PhysicalFilter[84]@1 ( stats=0.33, predicates=cast(k1#0 as TEXT) IN 
('10001', '20001') )                                       |
    |       +--PhysicalOlapScan[a_table]@0 ( stats=1 )                          
                                                           |
    
+--------------------------------------------------------------------------------------------------------------------------------------+
    ```
    after optimize:
    ```
    
+--------------------------------------------------------------------------------------------------------------------------------------+
    | Explain String(Nereids Planner)                                           
                                                           |
    
+--------------------------------------------------------------------------------------------------------------------------------------+
    | ========== PARSED PLAN (time: 15ms) ==========                            
                                                           |
    | UnboundResultSink[4] (  )                                                 
                                                           |
    | +--LogicalProject[3] ( distinct=false, projects=[*], excepts=[] )         
                                                           |
    |    +--LogicalFilter[2] ( predicates='k1 IN ('10001', '20001') )           
                                                           |
    |       +--LogicalCheckPolicy (  )                                          
                                                           |
    |          +--UnboundRelation ( id=RelationId#0, nameParts=a_table )        
                                                           |
    |                                                                           
                                                           |
    | ========== ANALYZED PLAN (time: 11ms) ==========                          
                                                           |
    | LogicalResultSink[15] ( outputExprs=[k1#0, k2#1, v1#2] )                  
                                                           |
    | +--LogicalProject[13] ( distinct=false, projects=[k1#0, k2#1, v1#2], 
excepts=[] )                                                    |
    |    +--LogicalFilter[11] ( predicates=k1#0 IN (10001, 20001) )             
                                                           |
    |       +--LogicalOlapScan ( qualified=internal.db.a_table, 
indexName=<index_not_selected>, selectedIndexId=12003, preAgg=UNSET )      |
    |                                                                           
                                                           |
    | ========== REWRITTEN PLAN (time: 12ms) ==========                         
                                                           |
    | LogicalResultSink[45] ( outputExprs=[k1#0, k2#1, v1#2] )                  
                                                           |
    | +--LogicalFilter[43] ( predicates=k1#0 IN (10001, 20001) )                
                                                           |
    |    +--LogicalOlapScan ( qualified=internal.db.a_table, indexName=a_table, 
selectedIndexId=12003, preAgg=OFF, No aggregate on scan. ) |
    |                                                                           
                                                           |
    | ========== OPTIMIZED PLAN (time: 4ms) ==========                          
                                                           |
    | PhysicalResultSink[90] ( outputExprs=[k1#0, k2#1, v1#2] )                 
                                                           |
    | +--PhysicalDistribute[87]@1 ( stats=0, 
distributionSpec=DistributionSpecGather )                                       
              |
    |    +--PhysicalFilter[84]@1 ( stats=0, predicates=k1#0 IN (10001, 20001) ) 
                                                           |
    |       +--PhysicalOlapScan[a_table]@0 ( stats=2 )                          
                                                           |
    
+--------------------------------------------------------------------------------------------------------------------------------------+
    ```
---
 .../doris/nereids/util/TypeCoercionUtils.java      | 30 +++++++++++++++++++---
 .../doris/nereids/util/TypeCoercionUtilsTest.java  | 26 +++++++++++++++++++
 2 files changed, 52 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
index b79c4567038..24da439aa27 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
@@ -104,7 +104,9 @@ import org.apache.logging.log4j.Logger;
 
 import java.math.BigDecimal;
 import java.math.BigInteger;
+import java.util.ArrayList;
 import java.util.List;
+import java.util.ListIterator;
 import java.util.Map;
 import java.util.Optional;
 import java.util.function.Supplier;
@@ -727,20 +729,40 @@ public class TypeCoercionUtils {
                 .allMatch(dt -> 
dt.equals(inPredicate.getCompareExpr().getDataType()))) {
             return inPredicate;
         }
+        // process string literal with numeric
+        boolean hitString = false;
+        List<Expression> newOptions = new 
ArrayList<>(inPredicate.getOptions());
+        if (!(inPredicate.getCompareExpr().getDataType().isStringLikeType())) {
+            ListIterator<Expression> iterator = newOptions.listIterator();
+            while (iterator.hasNext()) {
+                Expression origOption = iterator.next();
+                if (origOption instanceof Literal && ((Literal) 
origOption).isStringLikeLiteral()) {
+                    Optional<Expression> option = 
TypeCoercionUtils.characterLiteralTypeCoercion(
+                            ((Literal) origOption).getStringValue(), 
inPredicate.getCompareExpr().getDataType());
+                    if (option.isPresent()) {
+                        iterator.set(option.get());
+                        hitString = true;
+                    }
+                }
+            }
+        }
+        final InPredicate fmtInPredicate =
+                hitString ? new InPredicate(inPredicate.getCompareExpr(), 
newOptions) : inPredicate;
+
         Optional<DataType> optionalCommonType = 
TypeCoercionUtils.findWiderCommonTypeForComparison(
-                inPredicate.children()
+                fmtInPredicate.children()
                         .stream()
                         
.map(Expression::getDataType).collect(Collectors.toList()),
                 true);
 
         return optionalCommonType
                 .map(commonType -> {
-                    List<Expression> newChildren = 
inPredicate.children().stream()
+                    List<Expression> newChildren = 
fmtInPredicate.children().stream()
                             .map(e -> TypeCoercionUtils.castIfNotSameType(e, 
commonType))
                             .collect(Collectors.toList());
-                    return inPredicate.withChildren(newChildren);
+                    return fmtInPredicate.withChildren(newChildren);
                 })
-                .orElse(inPredicate);
+                .orElse(fmtInPredicate);
     }
 
     /**
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java
index 2b82db80430..83ab44a5429 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java
@@ -21,7 +21,9 @@ import org.apache.doris.nereids.trees.expressions.Add;
 import org.apache.doris.nereids.trees.expressions.Cast;
 import org.apache.doris.nereids.trees.expressions.Divide;
 import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.InPredicate;
 import org.apache.doris.nereids.trees.expressions.Multiply;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.Subtract;
 import org.apache.doris.nereids.trees.expressions.literal.CharLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.DecimalLiteral;
@@ -56,6 +58,7 @@ import org.apache.doris.nereids.types.TinyIntType;
 import org.apache.doris.nereids.types.VarcharType;
 import org.apache.doris.nereids.types.coercion.IntegralType;
 
+import com.google.common.collect.ImmutableList;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
@@ -734,4 +737,27 @@ public class TypeCoercionUtilsTest {
         Assertions.assertEquals(expression.child(0),
                 new Cast(multiply.child(0), 
DecimalV3Type.createDecimalV3Type(10, 3)));
     }
+
+    @Test
+    public void testProcessInStringCoercion() {
+        // BigInt slot vs String literal
+        InPredicate bigintString = new InPredicate(
+                new SlotReference("c1", BigIntType.INSTANCE),
+                ImmutableList.of(
+                        new VarcharLiteral("200"),
+                        new VarcharLiteral("922337203685477001")));
+        bigintString = (InPredicate) 
TypeCoercionUtils.processInPredicate(bigintString);
+        Assertions.assertEquals(BigIntType.INSTANCE, 
bigintString.getCompareExpr().getDataType());
+        Assertions.assertEquals(BigIntType.INSTANCE, 
bigintString.getOptions().get(0).getDataType());
+
+        // SmallInt slot vs String literal
+        InPredicate smallIntString = new InPredicate(
+                new SlotReference("c1", SmallIntType.INSTANCE),
+                ImmutableList.of(
+                        new DecimalLiteral(new BigDecimal("987654.321")),
+                        new VarcharLiteral("922337203685477001")));
+        smallIntString = (InPredicate) 
TypeCoercionUtils.processInPredicate(smallIntString);
+        Assertions.assertEquals(DecimalV3Type.createDecimalV3Type(23, 3), 
smallIntString.getCompareExpr().getDataType());
+        Assertions.assertEquals(DecimalV3Type.createDecimalV3Type(23, 3), 
smallIntString.getOptions().get(0).getDataType());
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to