This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 66e162c750c [improvement](statistics) Force to use zonemap for 
collecting string type min max. (#29631) (#29662)
66e162c750c is described below

commit 66e162c750c6743a5e4bdd9380533f73b2a6a489
Author: Jibing-Li <[email protected]>
AuthorDate: Mon Jan 8 16:03:10 2024 +0800

    [improvement](statistics) Force to use zonemap for collecting string type 
min max. (#29631) (#29662)
    
    Force to use zonemap for collecting string type min max.
    String type is not using zonemap for min max, because zonemap value at BE 
side is truncated at 512 bytes which may cause the value not accurate. But it's 
OK for statisitcs min max, and this could also avoid scan whole table while 
sampling.
---
 .../rules/implementation/AggregateStrategies.java  |  9 ++++--
 .../java/org/apache/doris/qe/SessionVariable.java  | 11 ++++++++
 .../doris/statistics/util/StatisticsUtil.java      |  1 +
 .../suites/statistics/analyze_stats.groovy         | 32 +++++++++++++++++-----
 4 files changed, 44 insertions(+), 9 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index b4fce67beb1..a0eb011ba92 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -610,10 +610,10 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
             if (mergeOp == PushDownAggOp.MIN_MAX || mergeOp == 
PushDownAggOp.MIX) {
                 PrimitiveType colType = column.getType().getPrimitiveType();
                 if (colType.isComplexType() || colType.isHllType() || 
colType.isBitmapType()
-                         || colType == PrimitiveType.STRING) {
+                         || (colType == PrimitiveType.STRING && 
!enablePushDownStringMinMax())) {
                     return canNotPush;
                 }
-                if (colType.isCharFamily() && column.getType().getLength() > 
512) {
+                if (colType.isCharFamily() && column.getType().getLength() > 
512 && !enablePushDownStringMinMax()) {
                     return canNotPush;
                 }
             }
@@ -665,6 +665,11 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
         }
     }
 
+    private boolean enablePushDownStringMinMax() {
+        ConnectContext connectContext = ConnectContext.get();
+        return connectContext != null && 
connectContext.getSessionVariable().isEnablePushDownStringMinMax();
+    }
+
     /**
      * sql: select count(*) from tbl group by id
      * <p>
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 7781b3369f1..4ce6470b42e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -438,6 +438,8 @@ public class SessionVariable implements Serializable, 
Writable {
 
     public static final String ENABLE_PUSHDOWN_MINMAX_ON_UNIQUE = 
"enable_pushdown_minmax_on_unique";
 
+    public static final String ENABLE_PUSHDOWN_STRING_MINMAX = 
"enable_pushdown_string_minmax";
+
     public static final List<String> DEBUG_VARIABLES = ImmutableList.of(
             SKIP_DELETE_PREDICATE,
             SKIP_DELETE_BITMAP,
@@ -1070,6 +1072,11 @@ public class SessionVariable implements Serializable, 
Writable {
         "是否启用pushdown minmax on unique table。", "Set whether to pushdown 
minmax on unique table."})
     public boolean enablePushDownMinMaxOnUnique = false;
 
+    // Whether enable push down string type minmax to scan node.
+    @VariableMgr.VarAttr(name = ENABLE_PUSHDOWN_STRING_MINMAX, needForward = 
true, description = {
+        "是否启用string类型min max下推。", "Set whether to enable push down string type 
minmax."})
+    public boolean enablePushDownStringMinMax = false;
+
     // Whether drop table when create table as select insert data appear error.
     @VariableMgr.VarAttr(name = DROP_TABLE_IF_CTAS_FAILED, needForward = true)
     public boolean dropTableIfCtasFailed = true;
@@ -2200,6 +2207,10 @@ public class SessionVariable implements Serializable, 
Writable {
         this.enablePushDownMinMaxOnUnique = enablePushDownMinMaxOnUnique;
     }
 
+    public boolean isEnablePushDownStringMinMax() {
+        return enablePushDownStringMinMax;
+    }
+
     /**
      * Nereids only support vectorized engine.
      *
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 503692673ed..9069843622b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -193,6 +193,7 @@ public class StatisticsUtil {
         sessionVariable.enableFileCache = false;
         sessionVariable.forbidUnknownColStats = false;
         sessionVariable.enablePushDownMinMaxOnUnique = true;
+        sessionVariable.enablePushDownStringMinMax = true;
         connectContext.setEnv(Env.getCurrentEnv());
         connectContext.setDatabase(FeConstants.INTERNAL_DB_NAME);
         connectContext.setQualifiedUser(UserIdentity.ROOT.getQualifiedUser());
diff --git a/regression-test/suites/statistics/analyze_stats.groovy 
b/regression-test/suites/statistics/analyze_stats.groovy
index 4038d2d6503..139ebff72c9 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -121,8 +121,6 @@ suite("test_analyze") {
         SET forbid_unknown_col_stats=true;
         """
 
-    Thread.sleep(1000 * 60)
-
 //    sql """
 //        SELECT * FROM ${tbl};
 //    """
@@ -2624,15 +2622,35 @@ PARTITION `p599` VALUES IN (599)
      );
    """
    sql """insert into agg_table_test values (1,'name1'), (2, 'name2')"""
-   Thread.sleep(1000 * 90)
+   Thread.sleep(1000 * 60)
    sql """analyze table agg_table_test with sample rows 100 with sync"""
    def agg_result = sql """show column stats agg_table_test (name)"""
    assertEquals(agg_result[0][6], "N/A")
    assertEquals(agg_result[0][7], "N/A")
-   agg_result = sql """show column stats agg_table_test (id)"""
-   assertEquals(agg_result[0][6], "1")
-   assertEquals(agg_result[0][7], "2")
-   sql """DROP DATABASE IF EXISTS AggTableTest"""
+
+   // Test sample string type min max
+   sql """
+     CREATE TABLE `string_min_max` (
+      `id` BIGINT NOT NULL,
+      `name` string NULL
+     ) ENGINE=OLAP
+     DUPLICATE KEY(`id`)
+     COMMENT 'OLAP'
+     DISTRIBUTED BY HASH(`id`) BUCKETS 32
+     PROPERTIES (
+      "replication_num" = "1"
+     );
+   """
+   sql """insert into string_min_max values (1,'name1'), (2, 'name2')"""
+   explain {
+       sql("select min(name), max(name) from string_min_max")
+       contains "pushAggOp=NONE"
+   }
+   sql """set enable_pushdown_string_minmax = true"""
+   explain {
+       sql("select min(name), max(name) from string_min_max")
+       contains "pushAggOp=MINMAX"
+   }
 
    // Test trigger type.
    sql """DROP DATABASE IF EXISTS trigger"""


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to