This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 93c0d05116e [improvement](statistics)Analyze empty table. #28077 
(#28397)
93c0d05116e is described below

commit 93c0d05116ed7ebed90f303405aa47b3a218a691
Author: Jibing-Li <[email protected]>
AuthorDate: Mon Dec 18 14:40:32 2023 +0800

    [improvement](statistics)Analyze empty table. #28077 (#28397)
    
    Analyze a table even when it's empty. The result should be like this:
    
    mysql> show column stats nation;
    
+-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+
    | column_name | count | ndv  | num_null | data_size | avg_size_byte | min  
| max  | method | type         | trigger | query_times | updated_time        |
    
+-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+
    | n_comment   | 0.0   | 0.0  | 0.0      | 0.0       | 0.0           | N/A  
| N/A  | FULL   | FUNDAMENTALS | MANUAL  | 0           | 2023-12-06 19:22:09 |
    | n_nationkey | 0.0   | 0.0  | 0.0      | 0.0       | 0.0           | N/A  
| N/A  | FULL   | FUNDAMENTALS | MANUAL  | 0           | 2023-12-06 19:22:09 |
    | n_regionkey | 0.0   | 0.0  | 0.0      | 0.0       | 0.0           | N/A  
| N/A  | FULL   | FUNDAMENTALS | MANUAL  | 0           | 2023-12-06 19:22:09 |
    | n_name      | 0.0   | 0.0  | 0.0      | 0.0       | 0.0           | N/A  
| N/A  | FULL   | FUNDAMENTALS | MANUAL  | 0           | 2023-12-06 19:22:09 |
    
+-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+----
---
 .../java/org/apache/doris/catalog/OlapTable.java   |   6 +-
 .../org/apache/doris/statistics/ColStatsData.java  |  13 ++
 .../apache/doris/statistics/OlapAnalysisTask.java  |  17 +-
 .../java/org/apache/doris/statistics/StatsId.java  |  10 +
 .../doris/statistics/util/StatisticsUtil.java      |   4 +
 .../suites/statistics/analyze_stats.groovy         | 237 +++++++++++++++++----
 6 files changed, 238 insertions(+), 49 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 7142723de31..4fb4aa7c7f8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -1127,11 +1127,6 @@ public class OlapTable extends Table {
         if (tblStats == null) {
             return true;
         }
-        long rowCount = getRowCount();
-        // TODO: Do we need to analyze an empty table?
-        if (rowCount == 0) {
-            return false;
-        }
         if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
                 .stream()
                 .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
@@ -1139,6 +1134,7 @@ public class OlapTable extends Table {
                 .collect(Collectors.toSet()))) {
             return true;
         }
+        long rowCount = getRowCount();
         long updateRows = tblStats.updatedRows.get();
         int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
         return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index 7878a065488..460475198eb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -21,6 +21,8 @@ import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.annotations.VisibleForTesting;
 
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
 import java.util.StringJoiner;
 
 /**
@@ -66,6 +68,17 @@ public class ColStatsData {
         updateTime = null;
     }
 
+    public ColStatsData(StatsId statsId) {
+        this.statsId = statsId;
+        count = 0;
+        ndv = 0;
+        nullCount = 0;
+        minLit = null;
+        maxLit = null;
+        dataSizeInBytes = 0;
+        updateTime = 
LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
+    }
+
     public ColStatsData(ResultRow row) {
         this.statsId = new StatsId(row);
         this.count = (long) Double.parseDouble(row.get(7));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 06c181b78ef..396e25b5a16 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -33,6 +33,7 @@ import org.apache.commons.text.StringSubstitutor;
 
 import java.security.SecureRandom;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -61,9 +62,9 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
     public void doExecute() throws Exception {
         Set<String> partitionNames = info.colToPartitions.get(info.colName);
         if (partitionNames.isEmpty()) {
-            LOG.debug("Skip empty empty partition task for column {} in 
{}.{}.{}",
-                    info.catalogId, info.dbId, info.tblId, info.colName);
-            job.appendBuf(this, Collections.emptyList());
+            StatsId statsId = new StatsId(concatColumnStatsId(), 
info.catalogId, info.dbId,
+                    info.tblId, info.indexId, info.colName, null);
+            job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
             return;
         }
         if (tableSample != null) {
@@ -308,4 +309,14 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
             && keysNum == 1
             && (keysType.equals(KeysType.UNIQUE_KEYS) || 
keysType.equals(KeysType.AGG_KEYS));
     }
+
+    protected String concatColumnStatsId() {
+        StringBuilder stringBuilder = new StringBuilder();
+        stringBuilder.append(info.tblId);
+        stringBuilder.append("-");
+        stringBuilder.append(info.indexId);
+        stringBuilder.append("-");
+        stringBuilder.append(info.colName);
+        return stringBuilder.toString();
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
index 9a897f4e70e..21395638cd6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
@@ -57,6 +57,16 @@ public class StatsId {
         this.partId = row.get(6);
     }
 
+    public StatsId(String id, long catalogId, long dbId, long tblId, long 
idxId, String colId, String partId) {
+        this.id = id;
+        this.catalogId = catalogId;
+        this.dbId = dbId;
+        this.tblId = tblId;
+        this.idxId = idxId;
+        this.colId = colId;
+        this.partId = partId;
+    }
+
     public String toSQL() {
         StringJoiner sj = new StringJoiner(",");
         sj.add(StatisticsUtil.quote(StatisticsUtil.escapeSQL(id)));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index d6f61aa9ebc..a9429a35578 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -525,6 +525,10 @@ public class StatisticsUtil {
      * @return Health, the value range is [0, 100], the larger the value, the 
healthier the statistics of the table.
      */
     public static int getTableHealth(long totalRows, long updatedRows) {
+        // Avoid analyze empty table every time.
+        if (totalRows == 0 && updatedRows == 0) {
+            return 100;
+        }
         if (updatedRows >= totalRows) {
             return 0;
         } else {
diff --git a/regression-test/suites/statistics/analyze_stats.groovy 
b/regression-test/suites/statistics/analyze_stats.groovy
index 61f18c8060c..73d3263487d 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -1305,45 +1305,200 @@ PARTITION `p599` VALUES IN (599)
     assert 
"111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
 [...]
     assert 
"111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
 [...]
 
-
-   // Test trigger type.
-   sql """DROP DATABASE IF EXISTS trigger"""
-   sql """CREATE DATABASE IF NOT EXISTS trigger"""
-   sql """USE trigger"""
-   sql """
-     CREATE TABLE if not exists trigger_test(
-      `id`      int NOT NULL,
-      `name`    VARCHAR(152)
-     )ENGINE=OLAP
-     DUPLICATE KEY(`id`)
-     COMMENT "OLAP"
-     DISTRIBUTED BY HASH(`id`) BUCKETS 1
-     PROPERTIES (
-      "replication_num" = "1"
-     );
-   """
-   sql """insert into trigger_test values(1,'name1') """
-   sql """analyze database trigger PROPERTIES("use.auto.analyzer"="true")"""
-
-   int i = 0;
-   for (0; i < 10; i++) {
-       def result = sql """show column stats trigger_test"""
-       if (result.size <= 0) {
-          Thread.sleep(1000)
-           continue;
-       }
-       assertEquals(result.size(), 2)
-       assertEquals(result[0][10], "SYSTEM")
-       assertEquals(result[1][10], "SYSTEM")
-       break
-   }
-   if (i < 10) {
-       sql """analyze table trigger_test with sync"""
-       def result = sql """show column stats trigger_test"""
-       assertEquals(result.size(), 2)
-       assertEquals(result[0][10], "MANUAL")
-       assertEquals(result[1][10], "MANUAL")
-   }
-   sql """DROP DATABASE IF EXISTS trigger"""
-
+    // Test analyze empty table
+    sql """TRUNCATE TABLE ${tbl}"""
+    sql """ANALYZE TABLE ${tbl} WITH SYNC"""
+    result_after_truncate = sql """show column stats ${tbl}"""
+    assertEquals(14, result_after_truncate.size())
+    result = sql """show column stats ${tbl}(analyzetestlimitedk0);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk0", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk1);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk1", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk2);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk2", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk3);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk3", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk4);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk4", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk5);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk5", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk6);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk6", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk7);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk7", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk8);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk8", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk9);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk9", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk10);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk10", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk11);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk11", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk12);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk12", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    result = sql """show column stats ${tbl}(analyzetestlimitedk13);"""
+    assertEquals(1, result.size())
+    assertEquals("analyzetestlimitedk13", result[0][0])
+    assertEquals("0.0", result[0][1])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("N/A", result[0][6])
+    assertEquals("N/A", result[0][7])
+
+    // Test trigger type.
+    sql """DROP DATABASE IF EXISTS trigger"""
+    sql """CREATE DATABASE IF NOT EXISTS trigger"""
+    sql """USE trigger"""
+    sql """
+      CREATE TABLE if not exists trigger_test(
+       `id`      int NOT NULL,
+       `name`    VARCHAR(152)
+      )ENGINE=OLAP
+      DUPLICATE KEY(`id`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY HASH(`id`) BUCKETS 1
+      PROPERTIES (
+       "replication_num" = "1"
+      );
+    """
+    sql """insert into trigger_test values(1,'name1') """
+    sql """analyze database trigger PROPERTIES("use.auto.analyzer"="true")"""
+
+    int i = 0;
+    for (0; i < 10; i++) {
+        def result = sql """show column stats trigger_test"""
+        if (result.size() != 2) {
+           Thread.sleep(1000)
+            continue;
+        }
+        assertEquals(result[0][10], "SYSTEM")
+        assertEquals(result[1][10], "SYSTEM")
+        break
+    }
+    if (i < 10) {
+        sql """analyze table trigger_test with sync"""
+        def result = sql """show column stats trigger_test"""
+        assertEquals(result.size(), 2)
+        assertEquals(result[0][10], "MANUAL")
+        assertEquals(result[1][10], "MANUAL")
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to