This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 37640682198 branch-4.0: [fix](nereids) set correct hot value after
analyze job #56403 (#56631)
37640682198 is described below
commit 3764068219864fcc1cf50628d04bc002de94fa5b
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Sep 30 08:57:22 2025 +0800
branch-4.0: [fix](nereids) set correct hot value after analyze job #56403
(#56631)
Cherry-picked from #56403
Co-authored-by: minghong <[email protected]>
---
.../org/apache/doris/statistics/ColStatsData.java | 2 +-
.../apache/doris/statistics/ColumnStatistic.java | 2 +-
.../agg_strategy/physical_agg_regulator.out | 8 ++---
.../stats/col_stats/hot_value_analyze_sync.groovy | 36 ++++++++++++++++++++++
.../agg_strategy/physical_agg_regulator.groovy | 4 +--
.../suites/statistics/test_hot_value.groovy | 15 ++++-----
6 files changed, 50 insertions(+), 17 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index 53657fa4665..e68b13a4908 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -183,7 +183,7 @@ public class ColStatsData {
columnStatisticBuilder.setUpdatedTime(updateTime);
if (ndv > 0) {
columnStatisticBuilder.setHotValues(StatisticsUtil.getHotValues(hotValues,
col.getType(),
- (1 / ndv)));
+ (1.0 / ndv)));
}
return columnStatisticBuilder.build();
} catch (Exception e) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index e5e0ce14182..aaf7c01e56b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -208,7 +208,7 @@ public class ColumnStatistic {
} else {
if (ndv > 0) {
columnStatisticBuilder.setHotValues(StatisticsUtil.getHotValues(row.get(14),
col.getType(),
- 1 / ndv));
+ 1.0 / ndv));
}
}
return columnStatisticBuilder.build();
diff --git
a/regression-test/data/nereids_rules_p0/agg_strategy/physical_agg_regulator.out
b/regression-test/data/nereids_rules_p0/agg_strategy/physical_agg_regulator.out
index 85c8a56e271..a33a83f01e0 100644
---
a/regression-test/data/nereids_rules_p0/agg_strategy/physical_agg_regulator.out
+++
b/regression-test/data/nereids_rules_p0/agg_strategy/physical_agg_regulator.out
@@ -29,15 +29,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------hashAgg[GLOBAL]
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------hashAgg[GLOBAL]
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------PhysicalCteConsumer ( cteId=CTEId#0 )
-- !request_deriver_parent_ndv_high --
PhysicalResultSink
diff --git
a/regression-test/suites/nereids_p0/stats/col_stats/hot_value_analyze_sync.groovy
b/regression-test/suites/nereids_p0/stats/col_stats/hot_value_analyze_sync.groovy
new file mode 100644
index 00000000000..e12e8a2ba20
--- /dev/null
+++
b/regression-test/suites/nereids_p0/stats/col_stats/hot_value_analyze_sync.groovy
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite('hot_value_analyze_sync') {
+ sql """
+ drop table if exists t1025;
+ create table t1025(a_1 int, b_5 int, c_10 int, d_1025 int) distributed by
hash(c_10) properties('replication_num'='1');
+ insert into t1025 select 1, number%5 , number%10, number from
numbers("number"="100");
+ """
+ def passed = false
+ def retry = 100
+ while (!passed && retry-- > 0) {
+ sql "analyze table t1025 with sample rows 4000 with sync;"
+ def result = sql "show column cached stats t1025"
+ log.info(result.toString())
+ // check hot value for d_1025 is null
+ if (!result.isEmpty()) {
+ assertEquals(result[3][17].toString(), "null")
+ passed = true
+ }
+ }
+}
\ No newline at end of file
diff --git
a/regression-test/suites/nereids_rules_p0/agg_strategy/physical_agg_regulator.groovy
b/regression-test/suites/nereids_rules_p0/agg_strategy/physical_agg_regulator.groovy
index 80a1e77186d..7b9f2c04f9a 100644
---
a/regression-test/suites/nereids_rules_p0/agg_strategy/physical_agg_regulator.groovy
+++
b/regression-test/suites/nereids_rules_p0/agg_strategy/physical_agg_regulator.groovy
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-suite("physical_agg_regulator_request_deriver") {
+suite("physical_agg_regulator") {
multi_sql"""
SET ignore_shape_nodes='PhysicalProject';
set runtime_filter_mode=OFF;
@@ -30,7 +30,7 @@ suite("physical_agg_regulator_request_deriver") {
qt_skew """explain shape plan
select count(distinct a_1), count(distinct b_5) from t1025_skew5000 group
by d_1025;"""
- // TODO 这个预期应该使用cte之后shuffle
+
multi_sql """
analyze table t1025 with sample rows 4000 with sync;
--drop cached stats t1025;
diff --git a/regression-test/suites/statistics/test_hot_value.groovy
b/regression-test/suites/statistics/test_hot_value.groovy
index 8b9d002229b..c4021f17555 100644
--- a/regression-test/suites/statistics/test_hot_value.groovy
+++ b/regression-test/suites/statistics/test_hot_value.groovy
@@ -52,7 +52,7 @@ suite("test_hot_value") {
sql """create database test_hot_value"""
sql """use test_hot_value"""
sql """set global enable_auto_analyze=false"""
- sql " set hot_value_threshold = 1"
+ sql " set hot_value_threshold = 0.1"
sql """CREATE TABLE test1 (
key1 int NULL,
@@ -152,20 +152,21 @@ suite("test_hot_value") {
assertTrue(hotValues[0] == "'1':0.5" || hotValues[0] == "'0':0.5")
assertTrue(hotValues[1] == "'1':0.5" || hotValues[1] == "'0':0.5")
- sql """alter table test1 modify column value1 set stats
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0',
'min_value'='AFRICA', 'max_value'='MIDDLE EAST', 'hot_values'='aaa :0.22');"""
+ sql """alter table test1 modify column value1 set stats
('row_count'='100.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0',
'min_value'='AFRICA', 'max_value'='MIDDLE EAST', 'hot_values'='aaa :0.8');"""
result = sql """show column stats test1(value1)"""
logger.info("3. result " + result)
assertEquals(1, result.size())
- assertEquals("5.0", result[0][2])
- assertEquals("'aaa':0.22", result[0][17])
+ assertEquals("100.0", result[0][2])
+ assertEquals("'aaa':0.8", result[0][17])
result = sql """show column cached stats test1(value1)"""
+ logger.info("3. cached result " + result)
assertEquals(1, result.size())
- assertEquals("5.0", result[0][2])
- assertEquals("'aaa':0.22", result[0][17])
+ assertEquals("100.0", result[0][2])
+ assertEquals("'aaa':0.8", result[0][17])
logger.info("2. memo plan ")
explain {
sql("memo plan select * from test1")
- contains "hotValues=('aaa':0.22)"
+ contains "hotValues=('aaa':0.8)"
}
sql """alter table test1 modify column value1 set stats
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0',
'min_value'='AFRICA', 'max_value'='MIDDLE EAST', 'hot_values'='a \\\\;a \\\\:a
:0.33');"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]