This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fdf9281e847 [improve](statistics)Support sample column skew info. 
(#52048)
fdf9281e847 is described below

commit fdf9281e847d825987d0ae418a90d0a77f4ca36e
Author: James <[email protected]>
AuthorDate: Wed Jul 2 09:54:54 2025 +0800

    [improve](statistics)Support sample column skew info. (#52048)
    
    ### What problem does this PR solve?
    
    Support sample column skew info.
    We collect top 3 values with their row count percentage of a column when
    doing sample analyzing, and store the result as a string to
    column_statistics table.
---
 .../org/apache/doris/analysis/AddColumnClause.java |   4 +
 .../apache/doris/analysis/ShowColumnStatsStmt.java |   2 +
 .../org/apache/doris/catalog/InternalSchema.java   |   2 +
 .../doris/catalog/InternalSchemaInitializer.java   |  28 +++-
 .../plans/commands/AlterColumnStatsCommand.java    |   1 +
 .../apache/doris/statistics/BaseAnalysisTask.java  | 142 +++++++++-------
 .../org/apache/doris/statistics/ColStatsData.java  |  10 +-
 .../apache/doris/statistics/ColumnStatistic.java   |  33 +++-
 .../doris/statistics/ColumnStatisticBuilder.java   |  18 +-
 .../apache/doris/statistics/HMSAnalysisTask.java   |   4 +-
 .../apache/doris/statistics/OlapAnalysisTask.java  |   5 +-
 .../PartitionColumnStatisticBuilder.java           |   2 +-
 .../apache/doris/statistics/StatisticsCache.java   |   2 +-
 .../doris/statistics/StatisticsRepository.java     |   8 +-
 .../org/apache/doris/statistics/StatsType.java     |   1 +
 .../doris/statistics/util/StatisticsUtil.java      |  33 ++++
 .../catalog/InternalSchemaInitializerTest.java     |  27 ++-
 .../doris/nereids/util/HyperGraphBuilder.java      |   2 +-
 .../apache/doris/statistics/AnalysisJobTest.java   |   2 +-
 .../doris/statistics/BaseAnalysisTaskTest.java     |   8 +-
 .../org/apache/doris/statistics/CacheTest.java     |   4 +-
 .../apache/doris/statistics/ColStatsDataTest.java  | 139 ++++++++--------
 .../doris/statistics/HMSAnalysisTaskTest.java      |  24 +--
 .../doris/statistics/OlapAnalysisTaskTest.java     |  59 ++++---
 .../org/apache/doris/statistics/StatsMockUtil.java |   1 +
 .../doris/statistics/util/StatisticsUtilTest.java  |  44 +++++
 .../hive/test_hive_statistic.groovy                |   2 +-
 regression-test/suites/statistics/test_ddl.groovy  |   2 +-
 .../suites/statistics/test_hot_value.groovy        | 184 +++++++++++++++++++++
 29 files changed, 589 insertions(+), 204 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnClause.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnClause.java
index 837acb5b51e..f7ae9083502 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnClause.java
@@ -153,4 +153,8 @@ public class AddColumnClause extends AlterTableClause {
     public String toString() {
         return toSql();
     }
+
+    public void setColumn(Column column) {
+        this.column = column;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
index aa840dc452b..aec15432fbd 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
@@ -72,6 +72,7 @@ public class ShowColumnStatsStmt extends ShowStmt implements 
NotFallbackInParser
                     .add("update_rows")
                     .add("last_analyze_row_count")
                     .add("last_analyze_version")
+                    .add("hot_values")
                     .build();
 
     private static final ImmutableList<String> PARTITION_COLUMN_TITLE_NAMES =
@@ -188,6 +189,7 @@ public class ShowColumnStatsStmt extends ShowStmt 
implements NotFallbackInParser
             row.add(String.valueOf(colStatsMeta == null ? "N/A" : 
colStatsMeta.updatedRows));
             row.add(String.valueOf(colStatsMeta == null ? "N/A" : 
colStatsMeta.rowCount));
             row.add(String.valueOf(colStatsMeta == null ? "N/A" : 
colStatsMeta.tableVersion));
+            row.add(String.valueOf(colStatsMeta == null ? "N/A" : 
p.second.getStringHotValues()));
             result.add(row);
         });
         return new ShowResultSet(getMetaData(), result);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java
index 9940bedbe49..81d7e7bfc44 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java
@@ -67,6 +67,8 @@ public class InternalSchema {
                 new ColumnDef("data_size_in_bytes", 
TypeDef.create(PrimitiveType.BIGINT), ColumnNullableType.NULLABLE));
         TABLE_STATS_SCHEMA.add(
                 new ColumnDef("update_time", 
TypeDef.create(PrimitiveType.DATETIME), ColumnNullableType.NOT_NULLABLE));
+        TABLE_STATS_SCHEMA.add(
+                new ColumnDef("hot_value", 
TypeDef.create(PrimitiveType.STRING), ColumnNullableType.NULLABLE));
 
         // partition statistics table
         PARTITION_STATS_SCHEMA = new ArrayList<>();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java
index 933272a5dbc..1444a4febaa 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.catalog;
 
+import org.apache.doris.analysis.AddColumnClause;
 import org.apache.doris.analysis.AlterClause;
 import org.apache.doris.analysis.AlterTableStmt;
 import org.apache.doris.analysis.ColumnDef;
@@ -62,12 +63,14 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 
 public class InternalSchemaInitializer extends Thread {
 
     private static final Logger LOG = 
LogManager.getLogger(InternalSchemaInitializer.class);
+    private static boolean StatsTableSchemaValid = false;
 
     public InternalSchemaInitializer() {
         super("InternalSchemaInitializer");
@@ -135,6 +138,7 @@ public class InternalSchemaInitializer extends Thread {
                 // IGNORE
             }
         }
+        StatsTableSchemaValid = true;
     }
 
     public Table findStatsTable() {
@@ -160,9 +164,25 @@ public class InternalSchemaInitializer extends Thread {
         }
     }
 
-    public List<AlterClause> getModifyColumnClauses(Table table) {
+    public List<AlterClause> getModifyColumnClauses(Table table) throws 
AnalysisException {
         List<AlterClause> clauses = Lists.newArrayList();
-        for (Column col : table.fullSchema) {
+        Set<String> currentColumnNames = table.getBaseSchema().stream()
+                .map(Column::getName)
+                .map(String::toLowerCase)
+                .collect(Collectors.toSet());
+        if 
(!currentColumnNames.containsAll(InternalSchema.TABLE_STATS_SCHEMA.stream()
+                .map(ColumnDef::getName)
+                .map(String::toLowerCase)
+                .collect(Collectors.toList()))) {
+            for (ColumnDef expected : InternalSchema.TABLE_STATS_SCHEMA) {
+                if 
(!currentColumnNames.contains(expected.getName().toLowerCase())) {
+                    AddColumnClause addColumnClause = new 
AddColumnClause(expected, null, null, null);
+                    addColumnClause.setColumn(expected.toColumn());
+                    clauses.add(addColumnClause);
+                }
+            }
+        }
+        for (Column col : table.getFullSchema()) {
             if (col.isKey() && col.getType().isVarchar()
                     && col.getType().getLength() < 
StatisticConstants.MAX_NAME_LEN) {
                 TypeDef typeDef = new TypeDef(
@@ -422,4 +442,8 @@ public class InternalSchemaInitializer extends Thread {
         }
         return true;
     }
+
+    public static boolean isStatsTableSchemaValid() {
+        return StatsTableSchemaValid;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterColumnStatsCommand.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterColumnStatsCommand.java
index accf55f13c6..ebfe1f73f37 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterColumnStatsCommand.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterColumnStatsCommand.java
@@ -74,6 +74,7 @@ public class AlterColumnStatsCommand extends AlterCommand {
             .add(ColumnStatistic.MIN_VALUE)
             .add(ColumnStatistic.MAX_VALUE)
             .add(StatsType.DATA_SIZE)
+            .add(StatsType.HOT_VALUES)
             .build();
     private final TableNameInfo tableNameInfo;
     private final String indexName;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 256309c4792..1613230bb9a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -66,62 +66,88 @@ public abstract class BaseAnalysisTask {
 
     protected static final String FULL_ANALYZE_TEMPLATE =
             "SELECT CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS `id`, "
-            + "         ${catalogId} AS `catalog_id`, "
-            + "         ${dbId} AS `db_id`, "
-            + "         ${tblId} AS `tbl_id`, "
-            + "         ${idxId} AS `idx_id`, "
-            + "         '${colId}' AS `col_id`, "
-            + "         NULL AS `part_id`, "
-            + "         COUNT(1) AS `row_count`, "
-            + "         NDV(`${colName}`) AS `ndv`, "
-            + "         COUNT(1) - COUNT(`${colName}`) AS `null_count`, "
-            + "         SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) 
AS `min`, "
-            + "         SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) 
AS `max`, "
-            + "         ${dataSizeFunction} AS `data_size`, "
-            + "         NOW() AS `update_time` "
-            + " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index}";
-
-    protected static final String LINEAR_ANALYZE_TEMPLATE = "SELECT "
-            + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS `id`, "
-            + "${catalogId} AS `catalog_id`, "
-            + "${dbId} AS `db_id`, "
-            + "${tblId} AS `tbl_id`, "
-            + "${idxId} AS `idx_id`, "
-            + "'${colId}' AS `col_id`, "
-            + "NULL AS `part_id`, "
-            + "${rowCount} AS `row_count`, "
-            + "${ndvFunction} as `ndv`, "
-            + "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) * 
${scaleFactor}) AS `null_count`, "
-            + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
-            + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
-            + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
-            + "NOW() FROM ( "
-            + "SELECT * FROM `${catalogName}`.`${dbName}`.`${tblName}` 
${index} ${sampleHints} ${limit}) "
-            + "as t ${preAggHint}";
-
-    protected static final String DUJ1_ANALYZE_TEMPLATE = "SELECT "
-            + "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, "
-            + "${catalogId} AS `catalog_id`, "
-            + "${dbId} AS `db_id`, "
-            + "${tblId} AS `tbl_id`, "
-            + "${idxId} AS `idx_id`, "
-            + "'${colId}' AS `col_id`, "
-            + "NULL AS `part_id`, "
-            + "${rowCount} AS `row_count`, "
-            + "${ndvFunction} as `ndv`, "
-            + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) 
* ${scaleFactor} as `null_count`, "
-            + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
-            + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
-            + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
-            + "NOW() "
-            + "FROM ( "
-            + "    SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`, 
SUM(`len`) as `column_length` "
-            + "    FROM "
-            + "        (SELECT ${subStringColName} AS `colValue`, 
LENGTH(`${colName}`) as `len` "
-            + "        FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} 
${sampleHints} ${limit}) as `t0`"
-            + "        ${preAggHint}"
-            + "    GROUP BY `t0`.`colValue` "
-            + ") as `t1` ";
+            +     "${catalogId} AS `catalog_id`, "
+            +     "${dbId} AS `db_id`, "
+            +     "${tblId} AS `tbl_id`, "
+            +     "${idxId} AS `idx_id`, "
+            +     "'${colId}' AS `col_id`, "
+            +     "NULL AS `part_id`, "
+            +     "COUNT(1) AS `row_count`, "
+            +     "NDV(`${colName}`) AS `ndv`, "
+            +     "COUNT(1) - COUNT(`${colName}`) AS `null_count`, "
+            +     "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) AS 
`min`, "
+            +     "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS 
`max`, "
+            +     "${dataSizeFunction} AS `data_size`, "
+            +     "NOW() AS `update_time`, "
+            +     "null as `hot_value` "
+            + "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index}";
+
+    protected static final String LINEAR_ANALYZE_TEMPLATE = "WITH cte1 AS ("
+            +     "SELECT `${colName}` "
+            +     "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} 
${sampleHints} ${limit} ${preAggHint}), "
+            + "cte2 AS ("
+            +     "SELECT CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS 
`id`, "
+            +     "${catalogId} AS `catalog_id`, "
+            +     "${dbId} AS `db_id`, "
+            +     "${tblId} AS `tbl_id`, "
+            +     "${idxId} AS `idx_id`, "
+            +     "'${colId}' AS `col_id`, "
+            +     "NULL AS `part_id`, "
+            +     "${rowCount} AS `row_count`, "
+            +     "${ndvFunction} as `ndv`, "
+            +     "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) 
* ${scaleFactor}) AS `null_count`, "
+            +     "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+            +     "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+            +     "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+            +     "NOW() FROM cte1), "
+            + "cte3 AS ("
+            +     "SELECT GROUP_CONCAT(CONCAT("
+            +         "REPLACE(REPLACE(t.`column_key`, \":\", \"\\\\:\"), 
\";\", \"\\\\;\"), "
+            +         "\" :\", ROUND(t.`count` * 100.0 / ${rowCount2}, 2)), \" 
;\") "
+            +         "as `hot_value` "
+            +     "FROM ("
+            +         "SELECT ${subStringColName} as `hash_value`, "
+            +         "MAX(`${colName}`) as `column_key`, "
+            +         "COUNT(1) AS `count` "
+            +         "FROM cte1 WHERE `${colName}` IS NOT NULL "
+            +         "GROUP BY `hash_value` ORDER BY `count` DESC LIMIT 3) t) 
"
+            + "SELECT * FROM cte2 CROSS JOIN cte3";
+
+    protected static final String DUJ1_ANALYZE_TEMPLATE = "WITH cte1 AS ("
+            + "SELECT MAX(t0.`col_value`) as `col_value`, COUNT(1) as `count`, 
SUM(`len`) as `column_length` "
+            + "FROM "
+            +     "(SELECT "
+            +     "${subStringColName} AS `hash_value`, "
+            +     "`${colName}` AS `col_value`, "
+            +     "LENGTH(`${colName}`) as `len` "
+            +     "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} 
${sampleHints} ${limit}) as `t0` "
+            +     "${preAggHint} GROUP BY `t0`.`hash_value`), "
+            + "cte2 AS ( "
+            +     "SELECT CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') 
AS `id`, "
+            +     "${catalogId} AS `catalog_id`, "
+            +     "${dbId} AS `db_id`, "
+            +     "${tblId} AS `tbl_id`, "
+            +     "${idxId} AS `idx_id`, "
+            +     "'${colId}' AS `col_id`, "
+            +     "NULL AS `part_id`, "
+            +     "${rowCount} AS `row_count`, "
+            +     "${ndvFunction} as `ndv`, "
+            +     "IFNULL(SUM(IF(`t1`.`col_value` IS NULL, `t1`.`count`, 0)), 
0) * ${scaleFactor} as `null_count`, "
+            +     "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+            +     "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+            +     "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+            +     "NOW() "
+            +     "FROM cte1 t1), "
+            + "cte3 AS ("
+            +     "SELECT GROUP_CONCAT(CONCAT("
+            +         "REPLACE(REPLACE(t2.`col_value`, \":\", \"\\\\:\"), 
\";\", \"\\\\;\"), "
+            +         "\" :\", ROUND(t2.`count` * 100.0 / ${rowCount2}, 2)), 
\" ;\") "
+            +         "as `hot_value` "
+            +     "FROM ("
+            +         "SELECT `col_value`, `count` "
+            +             "FROM cte1 "
+            +             "WHERE `col_value` IS NOT NULL ORDER BY `count` DESC 
LIMIT 3) t2) "
+            + "SELECT * FROM cte2 CROSS JOIN cte3";
 
     protected static final String ANALYZE_PARTITION_COLUMN_TEMPLATE = " SELECT 
"
             + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS `id`, "
@@ -294,8 +320,8 @@ public abstract class BaseAnalysisTask {
 
     protected String getNdvFunction(String totalRows) {
         String n = "SUM(`t1`.`count`)"; // sample rows
-        String f1 = "SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not 
null, 1, 0))";
-        String d = "COUNT(`t1`.`column_key`)"; // sample ndv
+        String f1 = "SUM(IF(`t1`.`count` = 1 and `t1`.`col_value` is not null, 
1, 0))";
+        String d = "COUNT(`t1`.`col_value`)"; // sample ndv
         // DUJ1 estimator: n*d / (n - f1 + f1*n/N)
         // f1 is the count of element that appears only once in the sample.
         // 
(https://github.com/postgres/postgres/blob/master/src/backend/commands/analyze.c)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index 329d39eb5ee..232ae506d71 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -66,6 +66,8 @@ public class ColStatsData {
     public final long dataSizeInBytes;
     @SerializedName("updateTime")
     public final String updateTime;
+    @SerializedName("hotValues")
+    public final String hotValues;
 
     @VisibleForTesting
     public ColStatsData() {
@@ -77,6 +79,7 @@ public class ColStatsData {
         maxLit = null;
         dataSizeInBytes = 0;
         updateTime = null;
+        hotValues = null;
     }
 
     public ColStatsData(StatsId statsId) {
@@ -88,6 +91,7 @@ public class ColStatsData {
         maxLit = null;
         dataSizeInBytes = 0;
         updateTime = 
LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
+        hotValues = null;
     }
 
     public ColStatsData(ResultRow row) {
@@ -99,10 +103,11 @@ public class ColStatsData {
         this.maxLit = row.get(11);
         this.dataSizeInBytes = (long) 
Double.parseDouble(row.getWithDefault(12, "0"));
         this.updateTime = row.get(13);
+        this.hotValues = row.get(14);
     }
 
     public ColStatsData(String id, long catalogId, long dbId, long tblId, long 
idxId, String colId, String partId,
-                        ColumnStatistic columnStatistic) {
+                        String hotValues, ColumnStatistic columnStatistic) {
         this.statsId = new StatsId(id, catalogId, dbId, tblId, idxId, colId, 
partId);
         this.count = Math.round(columnStatistic.count);
         this.ndv = Math.round(columnStatistic.ndv);
@@ -111,6 +116,7 @@ public class ColStatsData {
         this.maxLit = columnStatistic.maxExpr == null ? null : 
columnStatistic.maxExpr.getStringValue();
         this.dataSizeInBytes = Math.round(columnStatistic.dataSize);
         this.updateTime = columnStatistic.updatedTime;
+        this.hotValues = hotValues;
     }
 
     public String toSQL(boolean roundByParentheses) {
@@ -127,6 +133,7 @@ public class ColStatsData {
         sj.add(maxLit == null ? "NULL" : "'" + 
StatisticsUtil.escapeSQL(maxLit) + "'");
         sj.add(String.valueOf(dataSizeInBytes));
         sj.add(StatisticsUtil.quote(updateTime));
+        sj.add(hotValues == null ? "NULL" : "'" + 
StatisticsUtil.escapeSQL(hotValues) + "'");
         return sj.toString();
     }
 
@@ -174,6 +181,7 @@ public class ColStatsData {
                 columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
             }
             columnStatisticBuilder.setUpdatedTime(updateTime);
+            
columnStatisticBuilder.setHotValues(StatisticsUtil.getHotValues(hotValues, 
col.getType()));
             return columnStatisticBuilder.build();
         } catch (Exception e) {
             LOG.warn("Failed to convert column statistics.", e);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index e0e99fcdfad..ff4c127b4ef 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -34,6 +34,7 @@ import org.apache.logging.log4j.Logger;
 import org.json.JSONObject;
 
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 public class ColumnStatistic {
@@ -94,10 +95,13 @@ public class ColumnStatistic {
     @SerializedName("updatedTime")
     public final String updatedTime;
 
+    @SerializedName("hotValues")
+    public final Map<LiteralExpr, Float> hotValues;
+
     public ColumnStatistic(double count, double ndv, ColumnStatistic original, 
double avgSizeByte,
             double numNulls, double dataSize, double minValue, double maxValue,
             LiteralExpr minExpr, LiteralExpr maxExpr, boolean isUnKnown,
-            String updatedTime) {
+            String updatedTime, Map<LiteralExpr, Float> hotValues) {
         this.count = count;
         this.ndv = ndv;
         this.original = original;
@@ -110,6 +114,7 @@ public class ColumnStatistic {
         this.maxExpr = maxExpr;
         this.isUnKnown = isUnKnown;
         this.updatedTime = updatedTime;
+        this.hotValues = hotValues;
     }
 
     public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) {
@@ -187,6 +192,7 @@ public class ColumnStatistic {
             columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
         }
         columnStatisticBuilder.setUpdatedTime(row.get(13));
+        
columnStatisticBuilder.setHotValues(StatisticsUtil.getHotValues(row.get(14), 
col.getType()));
         return columnStatisticBuilder.build();
     }
 
@@ -298,7 +304,7 @@ public class ColumnStatistic {
     // Histogram is got by other place
     public static ColumnStatistic fromJson(String statJson) throws 
AnalysisException {
         JSONObject stat = new JSONObject(statJson);
-        Double minValue;
+        double minValue;
         switch (stat.getString("MinValueType")) {
             case "Infinite":
                 minValue = Double.NEGATIVE_INFINITY;
@@ -312,7 +318,7 @@ public class ColumnStatistic {
             default:
                 throw new RuntimeException(String.format("Min value does not 
get anytype"));
         }
-        Double maxValue;
+        double maxValue;
         switch (stat.getString("MaxValueType")) {
             case "Infinite":
                 maxValue = Double.POSITIVE_INFINITY;
@@ -330,7 +336,6 @@ public class ColumnStatistic {
         try {
             lastUpdatedTime = stat.getString("LastUpdatedTime");
         } catch (Exception e) {
-            LOG.warn("lastUpdateTimeIsEmpty", e.getMessage());
             if (LOG.isDebugEnabled()) {
                 LOG.debug(e);
             }
@@ -349,7 +354,8 @@ public class ColumnStatistic {
             LiteralExpr.create(stat.getString("MaxExprValue"),
                     GsonUtils.GSON.fromJson(stat.getString("MaxExprType"), 
Type.class)),
             stat.getBoolean("IsUnKnown"),
-            lastUpdatedTime
+            lastUpdatedTime,
+            null
         );
     }
 
@@ -395,4 +401,21 @@ public class ColumnStatistic {
                     .build();
         }
     }
+
+    public String getStringHotValues() {
+        if (hotValues == null || hotValues.isEmpty()) {
+            return null;
+        }
+        StringBuilder sb = new StringBuilder();
+        hotValues.forEach((k, v) -> {
+            sb.append(k.toString());
+            sb.append(":");
+            sb.append(v);
+            sb.append(";");
+        });
+        if (sb.length() > 0) {
+            sb.setLength(sb.length() - 1);
+        }
+        return sb.toString();
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
index 4e190ce388e..0b98a3e61f9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
@@ -21,6 +21,8 @@ import org.apache.doris.analysis.LiteralExpr;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.types.coercion.CharacterType;
 
+import java.util.Map;
+
 public class ColumnStatisticBuilder {
     private double count;
     private double ndv;
@@ -37,6 +39,7 @@ public class ColumnStatisticBuilder {
     private ColumnStatistic original;
 
     private String updatedTime;
+    private Map<LiteralExpr, Float> hotValues;
 
     public ColumnStatisticBuilder() {
     }
@@ -54,6 +57,7 @@ public class ColumnStatisticBuilder {
         this.isUnknown = columnStatistic.isUnKnown;
         this.original = columnStatistic.original;
         this.updatedTime = columnStatistic.updatedTime;
+        this.hotValues = columnStatistic.hotValues;
     }
 
     // ATTENTION: DON'T USE FOLLOWING TWO DURING STATS DERIVING EXCEPT FOR 
INITIALIZATION
@@ -74,6 +78,7 @@ public class ColumnStatisticBuilder {
         this.isUnknown = columnStatistic.isUnKnown;
         this.original = columnStatistic.original;
         this.updatedTime = columnStatistic.updatedTime;
+        this.hotValues = columnStatistic.hotValues;
     }
 
     public ColumnStatisticBuilder setNdv(double ndv) {
@@ -126,6 +131,11 @@ public class ColumnStatisticBuilder {
         return this;
     }
 
+    public ColumnStatisticBuilder setHotValues(Map<LiteralExpr, Float> 
hotValues) {
+        this.hotValues = hotValues;
+        return this;
+    }
+
     public double getCount() {
         return count;
     }
@@ -175,16 +185,20 @@ public class ColumnStatisticBuilder {
         return this;
     }
 
+    public Map<LiteralExpr, Float> getHotValues() {
+        return hotValues;
+    }
+
     public ColumnStatistic build() {
         dataSize = dataSize > 0 ? dataSize : Math.max((count - numNulls + 1) * 
avgSizeByte, 0);
         if (original == null && !isUnknown) {
             original = new ColumnStatistic(count, ndv, null, avgSizeByte, 
numNulls,
                     dataSize, minValue, maxValue, minExpr, maxExpr,
-                    isUnknown, updatedTime);
+                    isUnknown, updatedTime, hotValues);
         }
         ColumnStatistic colStats = new ColumnStatistic(count, ndv, original, 
avgSizeByte, numNulls,
                 dataSize, minValue, maxValue, minExpr, maxExpr,
-                isUnknown, updatedTime);
+                isUnknown, updatedTime, hotValues);
         return colStats;
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index 06009b38177..2a80a9995cb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -248,13 +248,15 @@ public class HMSAnalysisTask extends ExternalAnalysisTask 
{
             bucketFlag = true;
             sb.append(LINEAR_ANALYZE_TEMPLATE);
             params.put("ndvFunction", "ROUND(NDV(`${colName}`) * 
${scaleFactor})");
-            params.put("rowCount", "ROUND(count(1) * ${scaleFactor})");
+            params.put("rowCount", "ROUND(COUNT(1) * ${scaleFactor})");
+            params.put("rowCount2", "(SELECT COUNT(1) FROM cte1 WHERE 
`${colName}` IS NOT NULL)");
         } else {
             sb.append(DUJ1_ANALYZE_TEMPLATE);
             params.put("subStringColName", getStringTypeColName(col));
             params.put("dataSizeFunction", getDataSizeFunction(col, true));
             params.put("ndvFunction", getNdvFunction("ROUND(SUM(t1.count) * 
${scaleFactor})"));
             params.put("rowCount", "ROUND(SUM(t1.count) * ${scaleFactor})");
+            params.put("rowCount2", "(SELECT SUM(`count`) FROM cte1 WHERE 
`col_value` IS NOT NULL)");
         }
         LOG.info("Sample for column [{}]. Scale factor [{}], "
                 + "limited [{}], is distribute column [{}]",
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 43990f42d0b..bcb61f38e71 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -256,6 +256,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
         params.put("rowCount", String.valueOf(tableRowCount));
         params.put("type", col.getType().toString());
         params.put("limit", "");
+        params.put("subStringColName", getStringTypeColName(col));
 
         // For agg table and mor unique table, set PREAGGOPEN preAggHint.
         if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS)
@@ -269,6 +270,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
             params.put("scaleFactor", "1");
             params.put("sampleHints", "");
             params.put("ndvFunction", "ROUND(NDV(`${colName}`) * 
${scaleFactor})");
+            params.put("rowCount2", "(SELECT COUNT(1) FROM cte1 WHERE 
`${colName}` IS NOT NULL)");
             scanFullTable = true;
             return;
         }
@@ -299,6 +301,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
         }
         // Set algorithm related params.
         if (useLinearAnalyzeTemplate()) {
+            params.put("rowCount2", "(SELECT COUNT(1) FROM cte1 WHERE 
`${colName}` IS NOT NULL)");
             // For single unique key, use count as ndv.
             if (isSingleUniqueKey()) {
                 params.put("ndvFunction", String.valueOf(tableRowCount));
@@ -308,7 +311,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
         } else {
             params.put("ndvFunction", 
getNdvFunction(String.valueOf(tableRowCount)));
             params.put("dataSizeFunction", getDataSizeFunction(col, true));
-            params.put("subStringColName", getStringTypeColName(col));
+            params.put("rowCount2", "(SELECT SUM(`count`) FROM cte1 WHERE 
`col_value` IS NOT NULL)");
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticBuilder.java
index b1dc7cdd001..99fb28162e4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticBuilder.java
@@ -174,6 +174,6 @@ public class PartitionColumnStatisticBuilder {
 
     public ColumnStatistic toColumnStatistics() {
         return new ColumnStatistic(count, ndv.estimateCardinality(), null,
-                avgSizeByte, numNulls, dataSize, minValue, maxValue, minExpr, 
maxExpr, isUnknown, updatedTime);
+                avgSizeByte, numNulls, dataSize, minValue, maxValue, minExpr, 
maxExpr, isUnknown, updatedTime, null);
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
index 10a96ef13b0..59ae6fbf324 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
@@ -241,7 +241,7 @@ public class StatisticsCache {
         long retryTimes = 0;
         while (!StatisticsUtil.statsTblAvailable()) {
             try {
-                Thread.sleep(100L);
+                Thread.sleep(1000L);
             } catch (InterruptedException e) {
                 // IGNORE
             }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 5ebe51f8091..0780e88714f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -85,7 +85,8 @@ public class StatisticsRepository {
     private static final String INSERT_INTO_COLUMN_STATISTICS_FOR_ALTER =
             StatisticConstants.INSERT_INTO_COLUMN_STATS_PREFIX
                     + "('${id}', ${catalogId}, ${dbId}, ${tblId}, '${idxId}',"
-                    + "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, 
${min}, ${max}, ${dataSize}, NOW())";
+                    + "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, 
${min}, ${max}, ${dataSize}, NOW(), "
+                    + "${hotValues})";
 
     private static final String DELETE_TABLE_STATISTICS_BY_COLUMN_TEMPLATE = 
"DELETE FROM "
             + FeConstants.INTERNAL_DB_NAME + "." + 
StatisticConstants.TABLE_STATISTIC_TBL_NAME
@@ -323,6 +324,7 @@ public class StatisticsRepository {
         String min = alterColumnStatsCommand.getValue(StatsType.MIN_VALUE);
         String max = alterColumnStatsCommand.getValue(StatsType.MAX_VALUE);
         String dataSize = 
alterColumnStatsCommand.getValue(StatsType.DATA_SIZE);
+        String hotValues = 
alterColumnStatsCommand.getValue(StatsType.HOT_VALUES);
         long indexId = alterColumnStatsCommand.getIndexId();
         if (rowCount == null) {
             throw new RuntimeException("Row count is null.");
@@ -356,6 +358,7 @@ public class StatisticsRepository {
                 }
             }
         }
+        builder.setHotValues(StatisticsUtil.getHotValues(hotValues, 
column.getType()));
 
         ColumnStatistic columnStatistic = builder.build();
         Map<String, String> params = new HashMap<>();
@@ -371,6 +374,7 @@ public class StatisticsRepository {
         params.put("min", min == null ? "NULL" : "'" + 
StatisticsUtil.escapeSQL(min) + "'");
         params.put("max", max == null ? "NULL" : "'" + 
StatisticsUtil.escapeSQL(max) + "'");
         params.put("dataSize", String.valueOf(columnStatistic.dataSize));
+        params.put("hotValues", "'" + StatisticsUtil.escapeSQL(hotValues) + 
"'");
 
         if (partitionIds.isEmpty()) {
             // update table granularity statistics
@@ -378,7 +382,7 @@ public class StatisticsRepository {
             StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS_FOR_ALTER, 
params);
             ColStatsData data = new 
ColStatsData(constructId(objects.table.getId(), indexId, colName),
                     objects.catalog.getId(), objects.db.getId(), 
objects.table.getId(), indexId, colName,
-                    null, columnStatistic);
+                    null, hotValues, columnStatistic);
             Env.getCurrentEnv().getStatisticsCache().syncColStats(data);
             AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder()
                     .setTblUpdateTime(objects.table.getUpdateTime())
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsType.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsType.java
index d59f85285a6..a14e808afba 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsType.java
@@ -27,6 +27,7 @@ public enum StatsType {
     MIN_VALUE("min_value"),
     MAX_VALUE("max_value"),
     HISTOGRAM("histogram"),
+    HOT_VALUES("hot_values"),
     // only for test
     UNKNOWN("unknown");
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 188a73ab5c8..b6565f1dbaf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -37,6 +37,7 @@ import org.apache.doris.catalog.ArrayType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.InternalSchemaInitializer;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.MapType;
 import org.apache.doris.catalog.OlapTable;
@@ -87,6 +88,7 @@ import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.system.Frontend;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.text.StringSubstitutor;
@@ -111,6 +113,7 @@ import java.util.Base64;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Date;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -502,6 +505,9 @@ public class StatisticsUtil {
     }
 
     public static boolean statsTblAvailable() {
+        if (!InternalSchemaInitializer.isStatsTableSchemaValid()) {
+            return false;
+        }
         String dbName = FeConstants.INTERNAL_DB_NAME;
         List<OlapTable> statsTbls = new ArrayList<>();
         try {
@@ -1263,4 +1269,31 @@ public class StatisticsUtil {
     public static boolean canCollect() {
         return enableAutoAnalyze() && 
inAnalyzeTime(LocalTime.now(TimeUtils.getTimeZone().toZoneId()));
     }
+
+    /**
+     * Get the map of column literal value and its row count percentage in the 
table.
+     * The stringValues is like:
+     * value1 :percent1 ;value2 :percent2 ;value3 :percent3
+     * @return Map of LiteralExpr -> percentage.
+     */
+    public static LinkedHashMap<LiteralExpr, Float> getHotValues(String 
stringValues, Type type) {
+        if (stringValues == null) {
+            return null;
+        }
+        try {
+            LinkedHashMap<LiteralExpr, Float> ret = Maps.newLinkedHashMap();
+            for (String oneRow : stringValues.split(" ;")) {
+                String[] oneRowSplit = oneRow.split(" :");
+                float value = Float.parseFloat(oneRowSplit[1]);
+                String stringLiteral = oneRowSplit[0].replaceAll("\\\\:", 
":").replaceAll("\\\\;", ";");
+                ret.put(readableValue(type, stringLiteral), value);
+            }
+            if (!ret.isEmpty()) {
+                return ret;
+            }
+        } catch (Exception e) {
+            LOG.info("Failed to parse hot values [{}]. {}", stringValues, 
e.getMessage());
+        }
+        return null;
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/catalog/InternalSchemaInitializerTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/catalog/InternalSchemaInitializerTest.java
index c71c1e1c1b6..a05039307b5 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/catalog/InternalSchemaInitializerTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/catalog/InternalSchemaInitializerTest.java
@@ -22,33 +22,25 @@ import org.apache.doris.analysis.ColumnDef;
 import org.apache.doris.analysis.ColumnPosition;
 import org.apache.doris.analysis.CreateTableStmt;
 import org.apache.doris.analysis.ModifyColumnClause;
+import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.UserException;
-import org.apache.doris.datasource.hive.HMSExternalTable;
 import org.apache.doris.plugin.audit.AuditLoader;
 import org.apache.doris.statistics.StatisticConstants;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import mockit.Mock;
-import mockit.MockUp;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
 
 import java.lang.reflect.Method;
 import java.util.List;
 
 class InternalSchemaInitializerTest {
     @Test
-    public void testGetModifyColumn() {
-        new MockUp<HMSExternalTable>() {
-            @Mock
-            public HMSExternalTable.DLAType getDlaType() {
-                return HMSExternalTable.DLAType.HUDI;
-            }
-        };
-
+    public void testGetModifyColumn() throws AnalysisException {
         InternalSchemaInitializer initializer = new 
InternalSchemaInitializer();
-        OlapTable table = new OlapTable();
+        OlapTable table = Mockito.mock(OlapTable.class);
         Column key1 = new Column("key1", ScalarType.createVarcharType(100), 
true, null, false, null, "");
         Column key2 = new Column("key2", ScalarType.createVarcharType(100), 
true, null, true, null, "");
         Column key3 = new Column("key3", ScalarType.createVarcharType(1024), 
true, null, null, "");
@@ -56,6 +48,7 @@ class InternalSchemaInitializerTest {
         Column key5 = new Column("key5", ScalarType.INT, true, null, null, "");
         Column value1 = new Column("value1", ScalarType.INT, false, null, 
null, "");
         Column value2 = new Column("value2", 
ScalarType.createVarcharType(100), false, null, null, "");
+        Column value3 = new Column("hot_value", 
ScalarType.createVarcharType(100), false, null, null, "");
         List<Column> schema = Lists.newArrayList();
         schema.add(key1);
         schema.add(key2);
@@ -64,15 +57,17 @@ class InternalSchemaInitializerTest {
         schema.add(key5);
         schema.add(value1);
         schema.add(value2);
-        table.fullSchema = schema;
+        schema.add(value3);
+        Mockito.when(table.getFullSchema()).thenReturn(schema);
+        Mockito.when(table.getBaseSchema()).thenReturn(schema);
         List<AlterClause> modifyColumnClauses = 
initializer.getModifyColumnClauses(table);
-        Assertions.assertEquals(2, modifyColumnClauses.size());
-        ModifyColumnClause clause1 = (ModifyColumnClause) 
modifyColumnClauses.get(0);
+        Assertions.assertEquals(16, modifyColumnClauses.size());
+        ModifyColumnClause clause1 = (ModifyColumnClause) 
modifyColumnClauses.get(14);
         Assertions.assertEquals("key1", clause1.getColumn().getName());
         Assertions.assertEquals(StatisticConstants.MAX_NAME_LEN, 
clause1.getColumn().getType().getLength());
         Assertions.assertFalse(clause1.getColumn().isAllowNull());
 
-        ModifyColumnClause clause2 = (ModifyColumnClause) 
modifyColumnClauses.get(1);
+        ModifyColumnClause clause2 = (ModifyColumnClause) 
modifyColumnClauses.get(15);
         Assertions.assertEquals("key2", clause2.getColumn().getName());
         Assertions.assertEquals(StatisticConstants.MAX_NAME_LEN, 
clause2.getColumn().getType().getLength());
         Assertions.assertTrue(clause2.getColumn().isAllowNull());
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java
index 815cff0f2d9..d1d0e4ff70b 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java
@@ -370,7 +370,7 @@ public class HyperGraphBuilder {
             slotIdToColumnStats.put(slot,
                     new ColumnStatistic(count, count, null, 1, 0, 0, 0,
                             count, null, null, true,
-                            new Date().toString()));
+                            new Date().toString(), null));
         }
         return new Statistics(count, slotIdToColumnStats);
     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
index 5b7b430a2cd..d3f739a18fb 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
@@ -238,7 +238,7 @@ public class AnalysisJobTest {
         job.flushBuffer();
         Assertions.assertEquals(0, job.queryFinished.size());
         Assertions.assertEquals(0, job.buf.size());
-        Assertions.assertEquals("d8c3a1696769a1862331eec30d2c1ecb", 
job.stmtExecutor.getContext().getSqlHash());
+        Assertions.assertEquals("a48a05f7d2f079b74481e2909a7fa79b", 
job.stmtExecutor.getContext().getSqlHash());
     }
 
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java
index 11a271f9b40..343407dcaa6 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java
@@ -61,7 +61,7 @@ public class BaseAnalysisTaskTest {
         Assertions.assertEquals("NULL", maxFunction);
 
         String ndvFunction = 
olapAnalysisTask.getNdvFunction(String.valueOf(100));
-        Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`column_key`) 
/ (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not 
null, 1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 
0)) * SUM(`t1`.`count`) / 100)", ndvFunction);
+        Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`col_value`) / 
(SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`col_value` is not null, 
1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`col_value` is not null, 1, 0)) * 
SUM(`t1`.`count`) / 100)", ndvFunction);
         System.out.println(ndvFunction);
     }
 
@@ -82,6 +82,7 @@ public class BaseAnalysisTaskTest {
         values.add("max");
         values.add("400");
         values.add("500");
+        values.add(null);
         ResultRow row = new ResultRow(values);
         List<ResultRow> result = Lists.newArrayList();
         result.add(row);
@@ -98,7 +99,7 @@ public class BaseAnalysisTaskTest {
         } catch (Exception e) {
             Assertions.assertEquals(e.getMessage(),
                     "ColStatsData is invalid, skip analyzing. "
-                            + 
"('id',10000,20000,30000,0,'col',null,100,1100,300,'min','max',400,'500')");
+                            + 
"('id',10000,20000,30000,0,'col',null,100,1100,300,'min','max',400,'500',NULL)");
             return;
         }
         Assertions.fail();
@@ -121,6 +122,7 @@ public class BaseAnalysisTaskTest {
         values.add("max");
         values.add("400");
         values.add("500");
+        values.add(null);
         ResultRow row = new ResultRow(values);
         List<ResultRow> result = Lists.newArrayList();
         result.add(row);
@@ -137,7 +139,7 @@ public class BaseAnalysisTaskTest {
         } catch (Exception e) {
             Assertions.assertEquals(e.getMessage(),
                     "ColStatsData is invalid, skip analyzing. "
-                            + 
"('id',10000,20000,30000,0,'col',null,500,0,300,'min','max',400,'500')");
+                            + 
"('id',10000,20000,30000,0,'col',null,500,0,300,'min','max',400,'500',NULL)");
             return;
         }
         Assertions.fail();
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
index c031f27a917..e188df95f63 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
@@ -242,7 +242,7 @@ public class CacheTest extends TestWithFeService {
                 return Optional.of(new ColumnStatistic(1, 2,
                         null, 3, 4, 5, 6, 7,
                         null, null, false,
-                        new Date().toString()));
+                        new Date().toString(), null));
             }
         };
 
@@ -408,7 +408,7 @@ public class CacheTest extends TestWithFeService {
                 return Optional.of(new ColumnStatistic(1, 2,
                     null, 3, 4, 5, 6, 7,
                     null, null, false,
-                        new Date().toString()));
+                        new Date().toString(), null));
             }
         };
         columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, 
"col", connectContext);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/ColStatsDataTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/ColStatsDataTest.java
index b193edb2049..aa6301ed639 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/ColStatsDataTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/ColStatsDataTest.java
@@ -22,10 +22,11 @@ import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.collect.Lists;
-import mockit.Expectations;
-import mockit.Mocked;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentMatchers;
+import org.mockito.MockedStatic;
+import org.mockito.Mockito;
 
 import java.util.List;
 
@@ -47,6 +48,7 @@ public class ColStatsDataTest {
         values.add("max");
         values.add("400");
         values.add("500");
+        values.add(null);
         ResultRow row = new ResultRow(values);
         ColStatsData data = new ColStatsData(row);
         Assertions.assertEquals("id", data.statsId.id);
@@ -55,7 +57,7 @@ public class ColStatsDataTest {
         Assertions.assertEquals(30000, data.statsId.tblId);
         Assertions.assertEquals(0, data.statsId.idxId);
         Assertions.assertEquals("col", data.statsId.colId);
-        Assertions.assertEquals(null, data.statsId.partId);
+        Assertions.assertNull(data.statsId.partId);
         Assertions.assertEquals(100, data.count);
         Assertions.assertEquals(200, data.ndv);
         Assertions.assertEquals(300, data.nullCount);
@@ -63,6 +65,7 @@ public class ColStatsDataTest {
         Assertions.assertEquals("max", data.maxLit);
         Assertions.assertEquals(400, data.dataSizeInBytes);
         Assertions.assertEquals("500", data.updateTime);
+        Assertions.assertNull(data.hotValues);
     }
 
     @Test
@@ -82,6 +85,7 @@ public class ColStatsDataTest {
         values.add(null);
         values.add(null);
         values.add(null);
+        values.add(null);
         ResultRow row = new ResultRow(values);
         ColStatsData data = new ColStatsData(row);
         Assertions.assertEquals("id", data.statsId.id);
@@ -90,79 +94,83 @@ public class ColStatsDataTest {
         Assertions.assertEquals(30000, data.statsId.tblId);
         Assertions.assertEquals(0, data.statsId.idxId);
         Assertions.assertEquals("col", data.statsId.colId);
-        Assertions.assertEquals(null, data.statsId.partId);
+        Assertions.assertNull(data.statsId.partId);
         Assertions.assertEquals(0, data.count);
         Assertions.assertEquals(0, data.ndv);
         Assertions.assertEquals(0, data.nullCount);
-        Assertions.assertEquals(null, data.minLit);
-        Assertions.assertEquals(null, data.maxLit);
+        Assertions.assertNull(data.minLit);
+        Assertions.assertNull(data.maxLit);
         Assertions.assertEquals(0, data.dataSizeInBytes);
-        Assertions.assertEquals(null, data.updateTime);
+        Assertions.assertNull(data.updateTime);
+        Assertions.assertNull(data.hotValues);
     }
 
     @Test
-    public void testToColumnStatisticUnknown(@Mocked StatisticsUtil 
mockedClass) {
+    public void testToColumnStatisticUnknown() {
         // Test column is null
-        new Expectations() {
-            {
-                mockedClass.findColumn(anyLong, anyLong, anyLong, anyLong, 
anyString);
-                result = null;
-            }
-        };
-        List<String> values = Lists.newArrayList();
-        values.add("id");
-        values.add("10000");
-        values.add("20000");
-        values.add("30000");
-        values.add("0");
-        values.add("col");
-        values.add(null);
-        values.add("100");
-        values.add("200");
-        values.add("300");
-        values.add("min");
-        values.add("max");
-        values.add("400");
-        values.add("500");
-        ResultRow row = new ResultRow(values);
-        ColStatsData data = new ColStatsData(row);
-        ColumnStatistic columnStatistic = data.toColumnStatistic();
-        Assertions.assertEquals(ColumnStatistic.UNKNOWN, columnStatistic);
+        try (MockedStatic<StatisticsUtil> mocked = 
Mockito.mockStatic(StatisticsUtil.class)) {
+            // 设置静态方法的行为
+            mocked.when(() -> 
StatisticsUtil.findColumn(ArgumentMatchers.anyLong(), 
ArgumentMatchers.anyLong(),
+                    ArgumentMatchers.anyLong(), ArgumentMatchers.anyLong(), 
ArgumentMatchers.anyString()))
+                    .thenReturn(null);
+
+            List<String> values = Lists.newArrayList();
+            values.add("id");
+            values.add("10000");
+            values.add("20000");
+            values.add("30000");
+            values.add("0");
+            values.add("col");
+            values.add(null);
+            values.add("100");
+            values.add("200");
+            values.add("300");
+            values.add("min");
+            values.add("max");
+            values.add("400");
+            values.add("500");
+            values.add(null);
+            ResultRow row = new ResultRow(values);
+            ColStatsData data = new ColStatsData(row);
+            ColumnStatistic columnStatistic = data.toColumnStatistic();
+            Assertions.assertEquals(ColumnStatistic.UNKNOWN, columnStatistic);
+        }
     }
 
     @Test
-    public void testToColumnStatisticNormal(@Mocked StatisticsUtil 
mockedClass) {
-        new Expectations() {
-            {
-                mockedClass.findColumn(anyLong, anyLong, anyLong, anyLong, 
anyString);
-                result = new Column("colName", PrimitiveType.STRING);
-            }
-        };
-        List<String> values = Lists.newArrayList();
-        values.add("id");
-        values.add("10000");
-        values.add("20000");
-        values.add("30000");
-        values.add("0");
-        values.add("col");
-        values.add(null);
-        values.add("100");
-        values.add("200");
-        values.add("300");
-        values.add("null");
-        values.add("null");
-        values.add("400");
-        values.add("500");
-        ResultRow row = new ResultRow(values);
-        ColStatsData data = new ColStatsData(row);
-        ColumnStatistic columnStatistic = data.toColumnStatistic();
-        Assertions.assertEquals(100, columnStatistic.count);
-        Assertions.assertEquals(200, columnStatistic.ndv);
-        Assertions.assertEquals(300, columnStatistic.numNulls);
-        Assertions.assertEquals(Double.NEGATIVE_INFINITY, 
columnStatistic.minValue);
-        Assertions.assertEquals(Double.POSITIVE_INFINITY, 
columnStatistic.maxValue);
-        Assertions.assertEquals(400, columnStatistic.dataSize);
-        Assertions.assertEquals("500", columnStatistic.updatedTime);
+    public void testToColumnStatisticNormal() {
+        try (MockedStatic<StatisticsUtil> mocked = 
Mockito.mockStatic(StatisticsUtil.class)) {
+            // 设置静态方法的行为
+            mocked.when(() -> 
StatisticsUtil.findColumn(ArgumentMatchers.anyLong(), 
ArgumentMatchers.anyLong(),
+                            ArgumentMatchers.anyLong(), 
ArgumentMatchers.anyLong(), ArgumentMatchers.anyString()))
+                    .thenReturn(new Column("colName", PrimitiveType.STRING));
+            List<String> values = Lists.newArrayList();
+            values.add("id");
+            values.add("10000");
+            values.add("20000");
+            values.add("30000");
+            values.add("0");
+            values.add("col");
+            values.add(null);
+            values.add("100");
+            values.add("200");
+            values.add("300");
+            values.add("null");
+            values.add("null");
+            values.add("400");
+            values.add("500");
+            values.add(null);
+            ResultRow row = new ResultRow(values);
+            ColStatsData data = new ColStatsData(row);
+            ColumnStatistic columnStatistic = data.toColumnStatistic();
+            Assertions.assertEquals(100, columnStatistic.count);
+            Assertions.assertEquals(200, columnStatistic.ndv);
+            Assertions.assertEquals(300, columnStatistic.numNulls);
+            Assertions.assertEquals(Double.NEGATIVE_INFINITY, 
columnStatistic.minValue);
+            Assertions.assertEquals(Double.POSITIVE_INFINITY, 
columnStatistic.maxValue);
+            Assertions.assertEquals(400, columnStatistic.dataSize);
+            Assertions.assertEquals("500", columnStatistic.updatedTime);
+        }
     }
 
     @Test
@@ -193,6 +201,7 @@ public class ColStatsDataTest {
         values.add("max");
         values.add("400");
         values.add("500");
+        values.add(null);
         ResultRow row = new ResultRow(values);
         ColStatsData data = new ColStatsData(row);
         Assertions.assertFalse(data.isValid());
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
index 3a9cf558cc5..e870b0b3bfd 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
@@ -201,21 +201,15 @@ public class HMSAnalysisTaskTest {
         new MockUp<HMSAnalysisTask>() {
             @Mock
             public void runQuery(String sql) {
-                Assertions.assertEquals("SELECT CONCAT(30001, '-', -1, '-', 
'hour') AS `id`,"
-                        + "          10001 AS `catalog_id`,"
-                        + "          20001 AS `db_id`,"
-                        + "          30001 AS `tbl_id`,"
-                        + "          -1 AS `idx_id`,"
-                        + "          'hour' AS `col_id`,"
-                        + "          NULL AS `part_id`,"
-                        + "          COUNT(1) AS `row_count`,"
-                        + "          NDV(`hour`) AS `ndv`,"
-                        + "          COUNT(1) - COUNT(`hour`) AS `null_count`,"
-                        + "          SUBSTRING(CAST(MIN(`hour`) AS STRING), 1, 
1024) AS `min`,"
-                        + "          SUBSTRING(CAST(MAX(`hour`) AS STRING), 1, 
1024) AS `max`,"
-                        + "          COUNT(1) * 4 AS `data_size`,"
-                        + "          NOW() AS `update_time`"
-                        + "  FROM `hms`.`default`.`test` ", sql);
+                Assertions.assertEquals("SELECT CONCAT(30001, '-', -1, '-', 
'hour') AS `id`, "
+                        + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, "
+                        + "-1 AS `idx_id`, 'hour' AS `col_id`, NULL AS 
`part_id`, "
+                        + "COUNT(1) AS `row_count`, NDV(`hour`) AS `ndv`, "
+                        + "COUNT(1) - COUNT(`hour`) AS `null_count`, "
+                        + "SUBSTRING(CAST(MIN(`hour`) AS STRING), 1, 1024) AS 
`min`, "
+                        + "SUBSTRING(CAST(MAX(`hour`) AS STRING), 1, 1024) AS 
`max`, "
+                        + "COUNT(1) * 4 AS `data_size`, NOW() AS 
`update_time`, "
+                        + "null as `hot_value` FROM `hms`.`default`.`test` ", 
sql);
             }
         };
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index 7503ff8d202..08bc2ff9111 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -131,23 +131,29 @@ public class OlapAnalysisTaskTest {
             }
 
             @Mock
-            protected void getSampleParams(Map<String, String> params, long 
tableRowCount) {}
+            void getSampleParams(Map<String, String> params, long 
tableRowCount) {}
 
             @Mock
-            protected boolean useLinearAnalyzeTemplate() {
+            boolean useLinearAnalyzeTemplate() {
                 return true;
             }
 
                 @Mock
             public void runQuery(String sql) {
-                Assertions.assertEquals("SELECT CONCAT(30001, '-', -1, '-', 
'null') AS `id`, "
+                Assertions.assertEquals("WITH cte1 AS (SELECT `null` FROM 
`catalogName`.`${dbName}`.`null`  "
+                        + "${sampleHints} ${limit} ), cte2 AS (SELECT 
CONCAT(30001, '-', -1, '-', 'null') AS `id`, "
                         + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, "
-                        + "'null' AS `col_id`, NULL AS `part_id`, ${rowCount} 
AS `row_count`, "
-                        + "${ndvFunction} as `ndv`, ROUND(SUM(CASE WHEN `null` 
IS NULL THEN 1 ELSE 0 END) * ${scaleFactor}) AS `null_count`, "
-                        + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, 
SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
-                        + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() 
FROM "
-                        + "( SELECT * FROM `catalogName`.`${dbName}`.`null`  
${sampleHints} ${limit}) as t ", sql);
-                return;
+                        + "'null' AS `col_id`, NULL AS `part_id`, ${rowCount} 
AS `row_count`, ${ndvFunction} as `ndv`, "
+                        + "ROUND(SUM(CASE WHEN `null` IS NULL THEN 1 ELSE 0 
END) * ${scaleFactor}) AS `null_count`, "
+                        + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, "
+                        + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+                        + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() 
FROM cte1), "
+                        + "cte3 AS (SELECT 
GROUP_CONCAT(CONCAT(REPLACE(REPLACE(t.`column_key`, "
+                        + "\":\", \"\\\\:\"), \";\", \"\\\\;\"), \" :\", 
ROUND(t.`count` * 100.0 / ${rowCount2}, 2)), "
+                        + "\" ;\") as `hot_value` FROM (SELECT 
${subStringColName} as `hash_value`, "
+                        + "MAX(`null`) as `column_key`, COUNT(1) AS `count` 
FROM cte1 "
+                        + "WHERE `null` IS NOT NULL GROUP BY `hash_value` 
ORDER BY `count` DESC LIMIT 3) t) "
+                        + "SELECT * FROM cte2 CROSS JOIN cte3", sql);
             }
         };
 
@@ -166,21 +172,24 @@ public class OlapAnalysisTaskTest {
         new MockUp<OlapAnalysisTask>() {
             @Mock
             public void runQuery(String sql) {
-                Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') AS `id`, "
-                        + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, "
-                        + "'null' AS `col_id`, NULL AS `part_id`, ${rowCount} 
AS `row_count`, ${ndvFunction} as `ndv`, "
-                        + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, 
`t1`.`count`, 0)), 0) * ${scaleFactor} as `null_count`, "
-                        + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, 
SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
-                        + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() 
"
-                        + "FROM (     SELECT t0.`colValue` as `column_key`, 
COUNT(1) as `count`, SUM(`len`) as `column_length`     "
-                        + "FROM         (SELECT ${subStringColName} AS 
`colValue`, LENGTH(`null`) as `len`         "
-                        + "FROM `catalogName`.`${dbName}`.`null`  
${sampleHints} ${limit}) as `t0`        "
-                        + "    GROUP BY `t0`.`colValue` ) as `t1` ", sql);
-                return;
-            }
-
-            @Mock
-            protected boolean useLinearAnalyzeTemplate() {
+                Assertions.assertEquals("WITH cte1 AS (SELECT 
MAX(t0.`col_value`) as `col_value`, COUNT(1) as `count`,"
+                        + " SUM(`len`) as `column_length` FROM (SELECT 
${subStringColName} AS `hash_value`, "
+                        + "`null` AS `col_value`, LENGTH(`null`) as `len` FROM 
`catalogName`.`${dbName}`.`null`  "
+                        + "${sampleHints} ${limit}) as `t0`  GROUP BY 
`t0`.`hash_value`), "
+                        + "cte2 AS ( SELECT CONCAT('30001', '-', '-1', '-', 
'null') AS `id`, 10001 AS `catalog_id`, "
+                        + "20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS 
`idx_id`, 'null' AS `col_id`, NULL AS `part_id`, "
+                        + "${rowCount} AS `row_count`, ${ndvFunction} as 
`ndv`, IFNULL(SUM(IF(`t1`.`col_value` "
+                        + "IS NULL, `t1`.`count`, 0)), 0) * ${scaleFactor} as 
`null_count`, SUBSTRING(CAST('1' "
+                        + "AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2' 
AS STRING), 1, 1024) AS `max`, "
+                        + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() 
FROM cte1 t1), cte3 AS (SELECT "
+                        + "GROUP_CONCAT(CONCAT(REPLACE(REPLACE(t2.`col_value`, 
\":\", \"\\\\:\"), \";\", \"\\\\;\"), "
+                        + "\" :\", ROUND(t2.`count` * 100.0 / ${rowCount2}, 
2)), \" ;\") as `hot_value` FROM (SELECT "
+                        + "`col_value`, `count` FROM cte1 WHERE `col_value` IS 
NOT NULL ORDER BY `count` DESC LIMIT 3) "
+                        + "t2) SELECT * FROM cte2 CROSS JOIN cte3", sql);
+            }
+
+            @Mock
+            boolean useLinearAnalyzeTemplate() {
                 return false;
             }
         };
@@ -377,7 +386,7 @@ public class OlapAnalysisTaskTest {
         task.getSampleParams(params, 1000);
         Assertions.assertEquals("10.0", params.get("scaleFactor"));
         Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints"));
-        Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`column_key`) 
/ (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not 
null, 1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 
0)) * SUM(`t1`.`count`) / 1000)", params.get("ndvFunction"));
+        Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`col_value`) / 
(SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`col_value` is not null, 
1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`col_value` is not null, 1, 0)) * 
SUM(`t1`.`count`) / 1000)", params.get("ndvFunction"));
         Assertions.assertEquals("SUM(t1.count) * 4", 
params.get("dataSizeFunction"));
         Assertions.assertEquals("`${colName}`", 
params.get("subStringColName"));
         Assertions.assertEquals("/*+PREAGGOPEN*/", params.get("preAggHint"));
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java
index 45c78f665b0..3de70a11a52 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java
@@ -42,6 +42,7 @@ public class StatsMockUtil {
                 // 11
                 add("11");
                 add("12");
+                add(null);
                 add(String.valueOf(System.currentTimeMillis()));
             }};
         return new ResultRow(vals);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
index 0467f9b89dd..15a0c65e49c 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.statistics.util;
 
+import org.apache.doris.analysis.LiteralExpr;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.MaterializedIndexMeta;
@@ -558,4 +559,47 @@ class StatisticsUtilTest {
         Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, 
true, 1));
 
     }
+
+    @Test
+    void testGetHotValues() {
+        String value1 = "1234 :0.33 ;222 :0.22";
+        Map<LiteralExpr, Float> hotValues = 
StatisticsUtil.getHotValues(value1, Type.INT);
+        Assertions.assertEquals(2, hotValues.size());
+
+        int i = 0;
+        for (Map.Entry<LiteralExpr, Float> entry : hotValues.entrySet()) {
+            if (i == 0) {
+                Assertions.assertEquals(1234, entry.getKey().getLongValue());
+                Assertions.assertEquals("0.33", entry.getValue().toString());
+                i++;
+            } else {
+                Assertions.assertEquals(222, entry.getKey().getLongValue());
+                Assertions.assertEquals("0.22", entry.getValue().toString());
+            }
+        }
+
+        String value2 = "1234 :0.33";
+        hotValues = StatisticsUtil.getHotValues(value2, Type.INT);
+        Assertions.assertEquals(1, hotValues.size());
+
+        for (Map.Entry<LiteralExpr, Float> entry : hotValues.entrySet()) {
+            Assertions.assertEquals(1234, entry.getKey().getLongValue());
+            Assertions.assertEquals("0.33", entry.getValue().toString());
+        }
+
+        String value3 = "aabbcc\\:\\; :0.33 ; dd :0.22";
+        hotValues = StatisticsUtil.getHotValues(value3, Type.STRING);
+        Assertions.assertEquals(2, hotValues.size());
+        i = 0;
+        for (Map.Entry<LiteralExpr, Float> entry : hotValues.entrySet()) {
+            if (i == 0) {
+                Assertions.assertEquals("aabbcc:;", 
entry.getKey().getStringValue());
+                Assertions.assertEquals("0.33", entry.getValue().toString());
+                i++;
+            } else {
+                Assertions.assertEquals(" dd", 
entry.getKey().getStringValue());
+                Assertions.assertEquals("0.22", entry.getValue().toString());
+            }
+        }
+    }
 }
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy
index 971e6bc0253..03a84e45001 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy
@@ -293,7 +293,7 @@ suite("test_hive_statistic", 
"p0,external,hive,external_docker,external_docker_h
         def data_size_in_bytes = result[0][12]
         def update_time = result[0][13]
 
-        sql """insert into internal.__internal_schema.column_statistics values 
('$id', '$catalog_id', '$db_id', '$tbl_id', '$idx_id', '$col_id', NULL, $count, 
$ndv, $null_count, '', '', '$data_size_in_bytes', '$update_time')"""
+        sql """insert into internal.__internal_schema.column_statistics values 
('$id', '$catalog_id', '$db_id', '$tbl_id', '$idx_id', '$col_id', NULL, $count, 
$ndv, $null_count, '', '', '$data_size_in_bytes', '$update_time', null)"""
 
         result = sql """show column stats logs1_parquet (log_time)"""
         assertEquals(result.size(), 1)
diff --git a/regression-test/suites/statistics/test_ddl.groovy 
b/regression-test/suites/statistics/test_ddl.groovy
index be2c82bd4ba..a5fa9203cbf 100644
--- a/regression-test/suites/statistics/test_ddl.groovy
+++ b/regression-test/suites/statistics/test_ddl.groovy
@@ -17,7 +17,7 @@
 
 suite("test_ddl") {
     sql """
-        INSERT INTO `__internal_schema`.`column_statistics` VALUES 
('10143--1-cd_credit_rating','0','10141','10143','-1','cd_credit_rating',NULL,1920800,4,0,'Good','Unknown',0,'2023-06-15
 
19:50:43'),('10143--1-cd_demo_sk-10142','0','10141','10143','-1','cd_demo_sk','10142',1920800,1916366,0,'1','1920800',0,'2023-06-15
 
19:50:42'),('10143--1-cd_dep_employed_count','0','10141','10143','-1','cd_dep_employed_count',NULL,1920800,7,0,'0','6',0,'2023-06-15
 19:50:43'),('10143--1-cd_dep_employed_c [...]
+       INSERT INTO `__internal_schema`.`column_statistics` VALUES 
('10143--1-cd_credit_rating','0','10141','10143','-1','cd_credit_rating',NULL,1920800,4,0,'Good','Unknown',0,'2023-06-15
 
19:50:43',null),('10143--1-cd_demo_sk-10142','0','10141','10143','-1','cd_demo_sk','10142',1920800,1916366,0,'1','1920800',0,'2023-06-15
 
19:50:42',null),('10143--1-cd_dep_employed_count','0','10141','10143','-1','cd_dep_employed_count',NULL,1920800,7,0,'0','6',0,'2023-06-15
 19:50:43',null),('10143--1-cd_ [...]
     """
 
     sql """
diff --git a/regression-test/suites/statistics/test_hot_value.groovy 
b/regression-test/suites/statistics/test_hot_value.groovy
new file mode 100644
index 00000000000..39f59524511
--- /dev/null
+++ b/regression-test/suites/statistics/test_hot_value.groovy
@@ -0,0 +1,184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hot_value") {
+
+    def wait_row_count_reported = { db, table, row, column, expected ->
+        def result = sql """show frontends;"""
+        logger.info("show frontends result origin: " + result)
+        def host
+        def port
+        for (int i = 0; i < result.size(); i++) {
+            if (result[i][8] == "true") {
+                host = result[i][1]
+                port = result[i][4]
+            }
+        }
+        def tokens = context.config.jdbcUrl.split('/')
+        def url=tokens[0] + "//" + host + ":" + port
+        logger.info("Master url is " + url)
+        connect(context.config.jdbcUser, context.config.jdbcPassword, url) {
+            sql """use ${db}"""
+            result = sql """show frontends;"""
+            logger.info("show frontends result master: " + result)
+            for (int i = 0; i < 120; i++) {
+                Thread.sleep(5000)
+                result = sql """SHOW DATA FROM ${table};"""
+                logger.info("result " + result)
+                if (result[row][column] == expected) {
+                    return;
+                }
+            }
+            throw new Exception("Row count report timeout.")
+        }
+
+    }
+
+    sql """drop database if exists test_hot_value"""
+    sql """create database test_hot_value"""
+    sql """use test_hot_value"""
+    sql """set global enable_auto_analyze=false"""
+
+    sql """CREATE TABLE test1 (
+            key1 int NULL,
+            value1 varchar(25) NULL
+        )ENGINE=OLAP
+        DUPLICATE KEY(`key1`)
+        COMMENT "OLAP"
+        DISTRIBUTED BY HASH(`key1`) BUCKETS 2
+        PROPERTIES (
+            "replication_num" = "1"
+        )
+    """
+    sql """insert into test1 select number, number % 2 from 
numbers("number"="10000")"""
+    sql """CREATE TABLE test2 (
+            key1 int NULL,
+            value1 varchar(25) NULL
+        )ENGINE=OLAP
+        DUPLICATE KEY(`key1`)
+        COMMENT "OLAP"
+        DISTRIBUTED BY HASH(`key1`) BUCKETS 2
+        PROPERTIES (
+            "replication_num" = "1"
+        )
+    """
+    sql """insert into test2 select number, " : ;a" from 
numbers("number"="10000")"""
+    wait_row_count_reported("test_hot_value", "test1", 0, 4, "10000")
+    wait_row_count_reported("test_hot_value", "test2", 0, 4, "10000")
+    sql """analyze table test1 with sync"""
+    def result = sql """show column stats test1(key1)"""
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    assertEquals("null", result[0][17])
+    result = sql """show column stats test1(value1)"""
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    assertEquals("null", result[0][17])
+    result = sql """show column cached stats test1(key1)"""
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    assertEquals("null", result[0][17])
+    result = sql """show column cached stats test1(value1)"""
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    assertEquals("null", result[0][17])
+
+    sql """drop stats test1"""
+    sql """analyze table test1 with sample rows 400 with sync"""
+    result = sql """show column stats test1(key1)"""
+    logger.info("result " + result)
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    result = sql """show column stats test1(value1)"""
+    logger.info("result " + result)
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    String[] hotValues = result[0][17].split(";")
+    assertEquals(2, hotValues.length)
+    assertTrue(hotValues[0] == "1:50.0" || hotValues[0] == "0:50.0")
+    assertTrue(hotValues[1] == "1:50.0" || hotValues[1] == "0:50.0")
+    result = sql """show column cached stats test1(value1)"""
+    logger.info("result " + result)
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    hotValues = result[0][17].split(";")
+    assertEquals(2, hotValues.length)
+    assertTrue(hotValues[0] == "1:50.0" || hotValues[0] == "0:50.0")
+    assertTrue(hotValues[1] == "1:50.0" || hotValues[1] == "0:50.0")
+
+    sql """drop stats test1"""
+    sql """analyze table test1 with sample rows 40000 with sync"""
+    result = sql """show column stats test1(key1)"""
+    logger.info("result " + result)
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    result = sql """show column stats test1(value1)"""
+    logger.info("result " + result)
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    hotValues = result[0][17].split(";")
+    assertEquals(2, hotValues.length)
+    assertTrue(hotValues[0] == "1:50.0" || hotValues[0] == "0:50.0")
+    assertTrue(hotValues[1] == "1:50.0" || hotValues[1] == "0:50.0")
+    result = sql """show column cached stats test1(value1)"""
+    logger.info("result " + result)
+    assertEquals(1, result.size())
+    assertEquals("10000.0", result[0][2])
+    hotValues = result[0][17].split(";")
+    assertEquals(2, hotValues.length)
+    assertTrue(hotValues[0] == "1:50.0" || hotValues[0] == "0:50.0")
+    assertTrue(hotValues[1] == "1:50.0" || hotValues[1] == "0:50.0")
+
+    sql """alter table test1 modify column value1 set stats 
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 
'min_value'='AFRICA', 'max_value'='MIDDLE EAST', 'hot_values'='aaa :22.33');"""
+    result = sql """show column stats test1(value1)"""
+    assertEquals(1, result.size())
+    assertEquals("5.0", result[0][2])
+    assertEquals("aaa:22.33", result[0][17])
+    result = sql """show column cached stats test1(value1)"""
+    assertEquals(1, result.size())
+    assertEquals("5.0", result[0][2])
+    assertEquals("aaa:22.33", result[0][17])
+
+    sql """alter table test1 modify column value1 set stats 
('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 
'min_value'='AFRICA', 'max_value'='MIDDLE EAST', 'hot_values'='a \\\\;a \\\\:a 
:22.33');"""
+    result = sql """show column stats test1(value1)"""
+    assertEquals(1, result.size())
+    assertEquals("5.0", result[0][2])
+    assertEquals("a ;a :a:22.33", result[0][17])
+    result = sql """show column cached stats test1(value1)"""
+    assertEquals(1, result.size())
+    assertEquals("5.0", result[0][2])
+    assertEquals("a ;a :a:22.33", result[0][17])
+
+    sql """analyze table test2 with sample rows 100 with sync"""
+    result = sql """show column stats test2(value1)"""
+    assertEquals(1, result.size())
+    assertEquals(" : ;a:100.0", result[0][17])
+    result = sql """show column cached stats test2(value1)"""
+    assertEquals(1, result.size())
+    assertEquals(" : ;a:100.0", result[0][17])
+
+    sql """drop stats test2"""
+    sql """analyze table test2 with sample rows 4000000 with sync"""
+    result = sql """show column stats test2(value1)"""
+    assertEquals(1, result.size())
+    assertEquals(" : ;a:100.0", result[0][17])
+    result = sql """show column cached stats test2(value1)"""
+    assertEquals(1, result.size())
+    assertEquals(" : ;a:100.0", result[0][17])
+
+    sql """drop database if exists test_hot_value"""
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to