This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 37640682198 branch-4.0: [fix](nereids) set correct hot value after 
analyze job #56403 (#56631)
37640682198 is described below

commit 3764068219864fcc1cf50628d04bc002de94fa5b
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Sep 30 08:57:22 2025 +0800

    branch-4.0: [fix](nereids) set correct hot value after analyze job #56403 
(#56631)
    
    Cherry-picked from #56403
    
    Co-authored-by: minghong <[email protected]>
---
 .../org/apache/doris/statistics/ColStatsData.java  |  2 +-
 .../apache/doris/statistics/ColumnStatistic.java   |  2 +-
 .../agg_strategy/physical_agg_regulator.out        |  8 ++---
 .../stats/col_stats/hot_value_analyze_sync.groovy  | 36 ++++++++++++++++++++++
 .../agg_strategy/physical_agg_regulator.groovy     |  4 +--
 .../suites/statistics/test_hot_value.groovy        | 15 ++++-----
 6 files changed, 50 insertions(+), 17 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index 53657fa4665..e68b13a4908 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -183,7 +183,7 @@ public class ColStatsData {
             columnStatisticBuilder.setUpdatedTime(updateTime);
             if (ndv > 0) {
                 
columnStatisticBuilder.setHotValues(StatisticsUtil.getHotValues(hotValues, 
col.getType(),
-                        (1 / ndv)));
+                        (1.0 / ndv)));
             }
             return columnStatisticBuilder.build();
         } catch (Exception e) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index e5e0ce14182..aaf7c01e56b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -208,7 +208,7 @@ public class ColumnStatistic {
         } else {
             if (ndv > 0) {
                 
columnStatisticBuilder.setHotValues(StatisticsUtil.getHotValues(row.get(14), 
col.getType(),
-                        1 / ndv));
+                        1.0 / ndv));
             }
         }
         return columnStatisticBuilder.build();
diff --git 
a/regression-test/data/nereids_rules_p0/agg_strategy/physical_agg_regulator.out 
b/regression-test/data/nereids_rules_p0/agg_strategy/physical_agg_regulator.out
index 85c8a56e271..a33a83f01e0 100644
--- 
a/regression-test/data/nereids_rules_p0/agg_strategy/physical_agg_regulator.out
+++ 
b/regression-test/data/nereids_rules_p0/agg_strategy/physical_agg_regulator.out
@@ -29,15 +29,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------hashAgg[GLOBAL]
 ----------hashAgg[GLOBAL]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------hashAgg[GLOBAL]
 ----------hashAgg[GLOBAL]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
 -- !request_deriver_parent_ndv_high --
 PhysicalResultSink
diff --git 
a/regression-test/suites/nereids_p0/stats/col_stats/hot_value_analyze_sync.groovy
 
b/regression-test/suites/nereids_p0/stats/col_stats/hot_value_analyze_sync.groovy
new file mode 100644
index 00000000000..e12e8a2ba20
--- /dev/null
+++ 
b/regression-test/suites/nereids_p0/stats/col_stats/hot_value_analyze_sync.groovy
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite('hot_value_analyze_sync') {
+    sql """
+    drop table if exists t1025;
+    create table t1025(a_1 int, b_5 int, c_10 int, d_1025 int) distributed by 
hash(c_10) properties('replication_num'='1');
+    insert into t1025 select 1, number%5 , number%10, number from 
numbers("number"="100");
+    """
+    def passed = false
+    def retry = 100
+    while (!passed && retry-- > 0) {
+        sql "analyze table t1025 with sample rows 4000 with sync;"
+        def result = sql "show column cached stats t1025"
+        log.info(result.toString())
+        // check hot value for d_1025 is null
+        if (!result.isEmpty()) {
+            assertEquals(result[3][17].toString(), "null")
+            passed = true
+        }
+    }
+}
\ No newline at end of file
diff --git 
a/regression-test/suites/nereids_rules_p0/agg_strategy/physical_agg_regulator.groovy
 
b/regression-test/suites/nereids_rules_p0/agg_strategy/physical_agg_regulator.groovy
index 80a1e77186d..7b9f2c04f9a 100644
--- 
a/regression-test/suites/nereids_rules_p0/agg_strategy/physical_agg_regulator.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/agg_strategy/physical_agg_regulator.groovy
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-suite("physical_agg_regulator_request_deriver") {
+suite("physical_agg_regulator") {
     multi_sql"""
     SET ignore_shape_nodes='PhysicalProject';
     set runtime_filter_mode=OFF;
@@ -30,7 +30,7 @@ suite("physical_agg_regulator_request_deriver") {
     qt_skew """explain shape plan
     select count(distinct a_1), count(distinct b_5) from t1025_skew5000 group 
by d_1025;"""
 
-    // TODO 这个预期应该使用cte之后shuffle
+
     multi_sql """
     analyze table t1025 with sample rows 4000 with sync;
     --drop cached stats t1025;
diff --git a/regression-test/suites/statistics/test_hot_value.groovy 
b/regression-test/suites/statistics/test_hot_value.groovy
index 8b9d002229b..c4021f17555 100644
--- a/regression-test/suites/statistics/test_hot_value.groovy
+++ b/regression-test/suites/statistics/test_hot_value.groovy
@@ -52,7 +52,7 @@ suite("test_hot_value") {
     sql """create database test_hot_value"""
     sql """use test_hot_value"""
     sql """set global enable_auto_analyze=false"""
-    sql " set hot_value_threshold = 1"
+    sql " set hot_value_threshold = 0.1"
 
     sql """CREATE TABLE test1 (
             key1 int NULL,
@@ -152,20 +152,21 @@ suite("test_hot_value") {
     assertTrue(hotValues[0] == "'1':0.5" || hotValues[0] == "'0':0.5")
     assertTrue(hotValues[1] == "'1':0.5" || hotValues[1] == "'0':0.5")
 
-    sql """alter table test1 modify column value1 set stats 
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 
'min_value'='AFRICA', 'max_value'='MIDDLE EAST', 'hot_values'='aaa :0.22');"""
+    sql """alter table test1 modify column value1 set stats 
('row_count'='100.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 
'min_value'='AFRICA', 'max_value'='MIDDLE EAST', 'hot_values'='aaa :0.8');"""
     result = sql """show column stats test1(value1)"""
     logger.info("3. result " + result)
     assertEquals(1, result.size())
-    assertEquals("5.0", result[0][2])
-    assertEquals("'aaa':0.22", result[0][17])
+    assertEquals("100.0", result[0][2])
+    assertEquals("'aaa':0.8", result[0][17])
     result = sql """show column cached stats test1(value1)"""
+    logger.info("3. cached result " + result)
     assertEquals(1, result.size())
-    assertEquals("5.0", result[0][2])
-    assertEquals("'aaa':0.22", result[0][17])
+    assertEquals("100.0", result[0][2])
+    assertEquals("'aaa':0.8", result[0][17])
     logger.info("2. memo plan ")
     explain {
         sql("memo plan select * from test1")
-        contains "hotValues=('aaa':0.22)"
+        contains "hotValues=('aaa':0.8)"
     }
 
     sql """alter table test1 modify column value1 set stats 
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 
'min_value'='AFRICA', 'max_value'='MIDDLE EAST', 'hot_values'='a \\\\;a \\\\:a 
:0.33');"""


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to