This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3f504174c4c [improve](statistics)Add hot values to column statistics
to string. (#52651)
3f504174c4c is described below
commit 3f504174c4ca3c84879c7d6d4e34a948f20c44ab
Author: James <[email protected]>
AuthorDate: Fri Jul 4 10:49:58 2025 +0800
[improve](statistics)Add hot values to column statistics to string. (#52651)
### What problem does this PR solve?
Add hot values to column statistics to string, so that the hot values
could be printed in memo plan.
---
.../java/org/apache/doris/statistics/ColumnStatistic.java | 5 +++--
regression-test/suites/statistics/test_hot_value.groovy | 12 ++++++++++++
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index ff4c127b4ef..42f603f5f53 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -263,8 +263,9 @@ public class ColumnStatistic {
@Override
public String toString() {
return isUnKnown ? "unknown(" + count + ")"
- : String.format("ndv=%.4f, min=%f(%s), max=%f(%s), count=%.4f,
numNulls=%.4f, avgSizeByte=%f",
- ndv, minValue, minExpr, maxValue, maxExpr, count, numNulls,
avgSizeByte);
+ : String.format("ndv=%.4f, min=%f(%s), max=%f(%s), count=%.4f,
numNulls=%.4f, "
+ + "avgSizeByte=%f, hotValues=(%s)",
+ ndv, minValue, minExpr, maxValue, maxExpr, count, numNulls,
avgSizeByte, getStringHotValues());
}
public JSONObject toJson() {
diff --git a/regression-test/suites/statistics/test_hot_value.groovy
b/regression-test/suites/statistics/test_hot_value.groovy
index 39f59524511..596c295f88e 100644
--- a/regression-test/suites/statistics/test_hot_value.groovy
+++ b/regression-test/suites/statistics/test_hot_value.groovy
@@ -80,6 +80,10 @@ suite("test_hot_value") {
wait_row_count_reported("test_hot_value", "test1", 0, 4, "10000")
wait_row_count_reported("test_hot_value", "test2", 0, 4, "10000")
sql """analyze table test1 with sync"""
+ explain {
+ sql("memo plan select * from test1")
+ contains "hotValues=(null)"
+ }
def result = sql """show column stats test1(key1)"""
assertEquals(1, result.size())
assertEquals("10000.0", result[0][2])
@@ -152,6 +156,10 @@ suite("test_hot_value") {
assertEquals(1, result.size())
assertEquals("5.0", result[0][2])
assertEquals("aaa:22.33", result[0][17])
+ explain {
+ sql("memo plan select * from test1")
+ contains "hotValues=(aaa:22.33)"
+ }
sql """alter table test1 modify column value1 set stats
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0',
'min_value'='AFRICA', 'max_value'='MIDDLE EAST', 'hot_values'='a \\\\;a \\\\:a
:22.33');"""
result = sql """show column stats test1(value1)"""
@@ -162,6 +170,10 @@ suite("test_hot_value") {
assertEquals(1, result.size())
assertEquals("5.0", result[0][2])
assertEquals("a ;a :a:22.33", result[0][17])
+ explain {
+ sql("memo plan select * from test1")
+ contains "hotValues=(a ;a :a:22.33)"
+ }
sql """analyze table test2 with sample rows 100 with sync"""
result = sql """show column stats test2(value1)"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]