This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 3aa1a87fd02 [fix](Nereids) string literal coercion of in predicate
(#35358)
3aa1a87fd02 is described below
commit 3aa1a87fd02fce845417b861456bd4a3d2f826c3
Author: wangqt <[email protected]>
AuthorDate: Mon May 27 10:26:15 2024 +0800
[fix](Nereids) string literal coercion of in predicate (#35358)
pick from master #35200
Description:
The sql execute much slow when the literal value with string format in
`in predicate`; and the real data is integral type。
```
mysql> set enable_nereids_planner = false;
Query OK, 0 rows affected (0.03 sec)
mysql> select id,sum(clicks) from a_table where id in ('787934713',
'306960695') group by id limit 10;
+------------+---------------+
| id | sum(`clicks`) |
+------------+---------------+
| 787934713 | 2838 |
| 306960695 | 339 |
+------------+---------------+
2 rows in set (1.81 sec)
mysql> set enable_nereids_planner = true;
Query OK, 0 rows affected (0.02 sec)
mysql> select id,sum(clicks) from a_table where id in ('787934713',
'306960695') group by id limit 10;
+------------+-------------+
| id | sum(clicks) |
+------------+-------------+
| 787934713 | 2838 |
| 306960695 | 339 |
+------------+-------------+
2 rows in set (28.14 sec)
```
Reason:
In legacy planner, the string literal with convert to integral value, but
in the nereids planner do not do this convert and with do string matching in BE。
Solved:
do process string literal with numeric in `in predicate` like in
`comparison predicate`;
test table:
```
create table a_table(
k1 BIGINT NOT NULL,
k2 VARCHAR(100) NOT NULL,
v1 INT SUM NULL DEFAULT "0"
) ENGINE=OLAP
AGGREGATE KEY(k1,k2)
distributed BY hash(k1) buckets 2
properties("replication_num" = "1");
insert into a_table values (10, 'name1', 10),(20, 'name2', 10);
explain plan select * from a_table where k1 in ('10', '20001');
```
before optimize:
```
+--------------------------------------------------------------------------------------------------------------------------------------+
| Explain String(Nereids Planner)
|
+--------------------------------------------------------------------------------------------------------------------------------------+
| ========== PARSED PLAN (time: 1ms) ==========
|
| UnboundResultSink[4] ( )
|
| +--LogicalProject[3] ( distinct=false, projects=[*], excepts=[] )
|
| +--LogicalFilter[2] ( predicates='k1 IN ('10001', '20001') )
|
| +--LogicalCheckPolicy ( )
|
| +--UnboundRelation ( id=RelationId#0, nameParts=a_table )
|
|
|
| ========== ANALYZED PLAN (time: 2ms) ==========
|
| LogicalResultSink[15] ( outputExprs=[k1#0, k2#1, v1#2] )
|
| +--LogicalProject[13] ( distinct=false, projects=[k1#0, k2#1, v1#2],
excepts=[] ) |
| +--LogicalFilter[11] ( predicates=cast(k1#0 as TEXT) IN ('10001',
'20001') ) |
| +--LogicalOlapScan ( qualified=internal.db.a_table,
indexName=<index_not_selected>, selectedIndexId=12003, preAgg=UNSET ) |
|
|
| ========== REWRITTEN PLAN (time: 6ms) ==========
|
| LogicalResultSink[45] ( outputExprs=[k1#0, k2#1, v1#2] )
|
| +--LogicalFilter[43] ( predicates=cast(k1#0 as TEXT) IN ('10001',
'20001') ) |
| +--LogicalOlapScan ( qualified=internal.db.a_table, indexName=a_table,
selectedIndexId=12003, preAgg=OFF, No aggregate on scan. ) |
|
|
| ========== OPTIMIZED PLAN (time: 6ms) ==========
|
| PhysicalResultSink[90] ( outputExprs=[k1#0, k2#1, v1#2] )
|
| +--PhysicalDistribute[87]@1 ( stats=0.33,
distributionSpec=DistributionSpecGather )
|
| +--PhysicalFilter[84]@1 ( stats=0.33, predicates=cast(k1#0 as TEXT) IN
('10001', '20001') ) |
| +--PhysicalOlapScan[a_table]@0 ( stats=1 )
|
+--------------------------------------------------------------------------------------------------------------------------------------+
```
after optimize:
```
+--------------------------------------------------------------------------------------------------------------------------------------+
| Explain String(Nereids Planner)
|
+--------------------------------------------------------------------------------------------------------------------------------------+
| ========== PARSED PLAN (time: 15ms) ==========
|
| UnboundResultSink[4] ( )
|
| +--LogicalProject[3] ( distinct=false, projects=[*], excepts=[] )
|
| +--LogicalFilter[2] ( predicates='k1 IN ('10001', '20001') )
|
| +--LogicalCheckPolicy ( )
|
| +--UnboundRelation ( id=RelationId#0, nameParts=a_table )
|
|
|
| ========== ANALYZED PLAN (time: 11ms) ==========
|
| LogicalResultSink[15] ( outputExprs=[k1#0, k2#1, v1#2] )
|
| +--LogicalProject[13] ( distinct=false, projects=[k1#0, k2#1, v1#2],
excepts=[] ) |
| +--LogicalFilter[11] ( predicates=k1#0 IN (10001, 20001) )
|
| +--LogicalOlapScan ( qualified=internal.db.a_table,
indexName=<index_not_selected>, selectedIndexId=12003, preAgg=UNSET ) |
|
|
| ========== REWRITTEN PLAN (time: 12ms) ==========
|
| LogicalResultSink[45] ( outputExprs=[k1#0, k2#1, v1#2] )
|
| +--LogicalFilter[43] ( predicates=k1#0 IN (10001, 20001) )
|
| +--LogicalOlapScan ( qualified=internal.db.a_table, indexName=a_table,
selectedIndexId=12003, preAgg=OFF, No aggregate on scan. ) |
|
|
| ========== OPTIMIZED PLAN (time: 4ms) ==========
|
| PhysicalResultSink[90] ( outputExprs=[k1#0, k2#1, v1#2] )
|
| +--PhysicalDistribute[87]@1 ( stats=0,
distributionSpec=DistributionSpecGather )
|
| +--PhysicalFilter[84]@1 ( stats=0, predicates=k1#0 IN (10001, 20001) )
|
| +--PhysicalOlapScan[a_table]@0 ( stats=2 )
|
+--------------------------------------------------------------------------------------------------------------------------------------+
```
---
.../doris/nereids/util/TypeCoercionUtils.java | 30 +++++++++++++++++++---
.../doris/nereids/util/TypeCoercionUtilsTest.java | 26 +++++++++++++++++++
2 files changed, 52 insertions(+), 4 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
index b79c4567038..24da439aa27 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
@@ -104,7 +104,9 @@ import org.apache.logging.log4j.Logger;
import java.math.BigDecimal;
import java.math.BigInteger;
+import java.util.ArrayList;
import java.util.List;
+import java.util.ListIterator;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;
@@ -727,20 +729,40 @@ public class TypeCoercionUtils {
.allMatch(dt ->
dt.equals(inPredicate.getCompareExpr().getDataType()))) {
return inPredicate;
}
+ // process string literal with numeric
+ boolean hitString = false;
+ List<Expression> newOptions = new
ArrayList<>(inPredicate.getOptions());
+ if (!(inPredicate.getCompareExpr().getDataType().isStringLikeType())) {
+ ListIterator<Expression> iterator = newOptions.listIterator();
+ while (iterator.hasNext()) {
+ Expression origOption = iterator.next();
+ if (origOption instanceof Literal && ((Literal)
origOption).isStringLikeLiteral()) {
+ Optional<Expression> option =
TypeCoercionUtils.characterLiteralTypeCoercion(
+ ((Literal) origOption).getStringValue(),
inPredicate.getCompareExpr().getDataType());
+ if (option.isPresent()) {
+ iterator.set(option.get());
+ hitString = true;
+ }
+ }
+ }
+ }
+ final InPredicate fmtInPredicate =
+ hitString ? new InPredicate(inPredicate.getCompareExpr(),
newOptions) : inPredicate;
+
Optional<DataType> optionalCommonType =
TypeCoercionUtils.findWiderCommonTypeForComparison(
- inPredicate.children()
+ fmtInPredicate.children()
.stream()
.map(Expression::getDataType).collect(Collectors.toList()),
true);
return optionalCommonType
.map(commonType -> {
- List<Expression> newChildren =
inPredicate.children().stream()
+ List<Expression> newChildren =
fmtInPredicate.children().stream()
.map(e -> TypeCoercionUtils.castIfNotSameType(e,
commonType))
.collect(Collectors.toList());
- return inPredicate.withChildren(newChildren);
+ return fmtInPredicate.withChildren(newChildren);
})
- .orElse(inPredicate);
+ .orElse(fmtInPredicate);
}
/**
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java
index 2b82db80430..83ab44a5429 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java
@@ -21,7 +21,9 @@ import org.apache.doris.nereids.trees.expressions.Add;
import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.Divide;
import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.InPredicate;
import org.apache.doris.nereids.trees.expressions.Multiply;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.Subtract;
import org.apache.doris.nereids.trees.expressions.literal.CharLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DecimalLiteral;
@@ -56,6 +58,7 @@ import org.apache.doris.nereids.types.TinyIntType;
import org.apache.doris.nereids.types.VarcharType;
import org.apache.doris.nereids.types.coercion.IntegralType;
+import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -734,4 +737,27 @@ public class TypeCoercionUtilsTest {
Assertions.assertEquals(expression.child(0),
new Cast(multiply.child(0),
DecimalV3Type.createDecimalV3Type(10, 3)));
}
+
+ @Test
+ public void testProcessInStringCoercion() {
+ // BigInt slot vs String literal
+ InPredicate bigintString = new InPredicate(
+ new SlotReference("c1", BigIntType.INSTANCE),
+ ImmutableList.of(
+ new VarcharLiteral("200"),
+ new VarcharLiteral("922337203685477001")));
+ bigintString = (InPredicate)
TypeCoercionUtils.processInPredicate(bigintString);
+ Assertions.assertEquals(BigIntType.INSTANCE,
bigintString.getCompareExpr().getDataType());
+ Assertions.assertEquals(BigIntType.INSTANCE,
bigintString.getOptions().get(0).getDataType());
+
+ // SmallInt slot vs String literal
+ InPredicate smallIntString = new InPredicate(
+ new SlotReference("c1", SmallIntType.INSTANCE),
+ ImmutableList.of(
+ new DecimalLiteral(new BigDecimal("987654.321")),
+ new VarcharLiteral("922337203685477001")));
+ smallIntString = (InPredicate)
TypeCoercionUtils.processInPredicate(smallIntString);
+ Assertions.assertEquals(DecimalV3Type.createDecimalV3Type(23, 3),
smallIntString.getCompareExpr().getDataType());
+ Assertions.assertEquals(DecimalV3Type.createDecimalV3Type(23, 3),
smallIntString.getOptions().get(0).getDataType());
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]