This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a3c33ebed20 [improvement](statistics) Force to use zonemap for
collecting string type min max. (#29631)
a3c33ebed20 is described below
commit a3c33ebed20170b1eaa9894541c35ed1f60c8c43
Author: Jibing-Li <[email protected]>
AuthorDate: Mon Jan 8 13:49:37 2024 +0800
[improvement](statistics) Force to use zonemap for collecting string type
min max. (#29631)
Force to use zonemap for collecting string type min max.
String type is not using zonemap for min max, because zonemap value at BE
side is truncated at 512 bytes which may cause the value not accurate. But it's
OK for statisitcs min max, and this could also avoid scan whole table while
sampling.
---
.../rules/implementation/AggregateStrategies.java | 9 ++++--
.../java/org/apache/doris/qe/SessionVariable.java | 11 ++++++++
.../doris/statistics/util/StatisticsUtil.java | 1 +
.../suites/statistics/analyze_stats.groovy | 32 +++++++++++++++++-----
4 files changed, 44 insertions(+), 9 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index b4fce67beb1..a0eb011ba92 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -610,10 +610,10 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
if (mergeOp == PushDownAggOp.MIN_MAX || mergeOp ==
PushDownAggOp.MIX) {
PrimitiveType colType = column.getType().getPrimitiveType();
if (colType.isComplexType() || colType.isHllType() ||
colType.isBitmapType()
- || colType == PrimitiveType.STRING) {
+ || (colType == PrimitiveType.STRING &&
!enablePushDownStringMinMax())) {
return canNotPush;
}
- if (colType.isCharFamily() && column.getType().getLength() >
512) {
+ if (colType.isCharFamily() && column.getType().getLength() >
512 && !enablePushDownStringMinMax()) {
return canNotPush;
}
}
@@ -665,6 +665,11 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
}
}
+ private boolean enablePushDownStringMinMax() {
+ ConnectContext connectContext = ConnectContext.get();
+ return connectContext != null &&
connectContext.getSessionVariable().isEnablePushDownStringMinMax();
+ }
+
/**
* sql: select count(*) from tbl group by id
* <p>
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 6f078746d4f..be6e13e399d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -482,6 +482,8 @@ public class SessionVariable implements Serializable,
Writable {
public static final String ENABLE_PUSHDOWN_MINMAX_ON_UNIQUE =
"enable_pushdown_minmax_on_unique";
+ public static final String ENABLE_PUSHDOWN_STRING_MINMAX =
"enable_pushdown_string_minmax";
+
// When set use fix replica = true, the fixed replica maybe bad, try to
use the health one if
// this session variable is set to true.
public static final String FALLBACK_OTHER_REPLICA_WHEN_FIXED_CORRUPT =
"fallback_other_replica_when_fixed_corrupt";
@@ -1229,6 +1231,11 @@ public class SessionVariable implements Serializable,
Writable {
"是否启用pushdown minmax on unique table。", "Set whether to pushdown
minmax on unique table."})
public boolean enablePushDownMinMaxOnUnique = false;
+ // Whether enable push down string type minmax to scan node.
+ @VariableMgr.VarAttr(name = ENABLE_PUSHDOWN_STRING_MINMAX, needForward =
true, description = {
+ "是否启用string类型min max下推。", "Set whether to enable push down string type
minmax."})
+ public boolean enablePushDownStringMinMax = false;
+
// Whether drop table when create table as select insert data appear error.
@VariableMgr.VarAttr(name = DROP_TABLE_IF_CTAS_FAILED, needForward = true)
public boolean dropTableIfCtasFailed = true;
@@ -2474,6 +2481,10 @@ public class SessionVariable implements Serializable,
Writable {
this.enablePushDownMinMaxOnUnique = enablePushDownMinMaxOnUnique;
}
+ public boolean isEnablePushDownStringMinMax() {
+ return enablePushDownStringMinMax;
+ }
+
/**
* Nereids only support vectorized engine.
*
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index d6c171161fc..5c8aec3fbf6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -193,6 +193,7 @@ public class StatisticsUtil {
sessionVariable.enableFileCache = false;
sessionVariable.forbidUnknownColStats = false;
sessionVariable.enablePushDownMinMaxOnUnique = true;
+ sessionVariable.enablePushDownStringMinMax = true;
connectContext.setEnv(Env.getCurrentEnv());
connectContext.setDatabase(FeConstants.INTERNAL_DB_NAME);
connectContext.setQualifiedUser(UserIdentity.ROOT.getQualifiedUser());
diff --git a/regression-test/suites/statistics/analyze_stats.groovy
b/regression-test/suites/statistics/analyze_stats.groovy
index 67c5705b62f..1559b5350b2 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -121,8 +121,6 @@ suite("test_analyze") {
SET forbid_unknown_col_stats=true;
"""
- Thread.sleep(1000 * 60)
-
sql """
SELECT * FROM ${tbl};
"""
@@ -2626,15 +2624,35 @@ PARTITION `p599` VALUES IN (599)
);
"""
sql """insert into agg_table_test values (1,'name1'), (2, 'name2')"""
- Thread.sleep(1000 * 90)
+ Thread.sleep(1000 * 60)
sql """analyze table agg_table_test with sample rows 100 with sync"""
def agg_result = sql """show column stats agg_table_test (name)"""
assertEquals(agg_result[0][6], "N/A")
assertEquals(agg_result[0][7], "N/A")
- agg_result = sql """show column stats agg_table_test (id)"""
- assertEquals(agg_result[0][6], "1")
- assertEquals(agg_result[0][7], "2")
- sql """DROP DATABASE IF EXISTS AggTableTest"""
+
+ // Test sample string type min max
+ sql """
+ CREATE TABLE `string_min_max` (
+ `id` BIGINT NOT NULL,
+ `name` string NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 32
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ sql """insert into string_min_max values (1,'name1'), (2, 'name2')"""
+ explain {
+ sql("select min(name), max(name) from string_min_max")
+ contains "pushAggOp=NONE"
+ }
+ sql """set enable_pushdown_string_minmax = true"""
+ explain {
+ sql("select min(name), max(name) from string_min_max")
+ contains "pushAggOp=MINMAX"
+ }
// Test trigger type.
sql """DROP DATABASE IF EXISTS trigger"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]