This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 93c0d05116e [improvement](statistics)Analyze empty table. #28077
(#28397)
93c0d05116e is described below
commit 93c0d05116ed7ebed90f303405aa47b3a218a691
Author: Jibing-Li <[email protected]>
AuthorDate: Mon Dec 18 14:40:32 2023 +0800
[improvement](statistics)Analyze empty table. #28077 (#28397)
Analyze a table even when it's empty. The result should be like this:
mysql> show column stats nation;
+-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+
| column_name | count | ndv | num_null | data_size | avg_size_byte | min
| max | method | type | trigger | query_times | updated_time |
+-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+
| n_comment | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A
| N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 |
| n_nationkey | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A
| N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 |
| n_regionkey | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A
| N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 |
| n_name | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A
| N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 |
+-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+----
---
.../java/org/apache/doris/catalog/OlapTable.java | 6 +-
.../org/apache/doris/statistics/ColStatsData.java | 13 ++
.../apache/doris/statistics/OlapAnalysisTask.java | 17 +-
.../java/org/apache/doris/statistics/StatsId.java | 10 +
.../doris/statistics/util/StatisticsUtil.java | 4 +
.../suites/statistics/analyze_stats.groovy | 237 +++++++++++++++++----
6 files changed, 238 insertions(+), 49 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 7142723de31..4fb4aa7c7f8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -1127,11 +1127,6 @@ public class OlapTable extends Table {
if (tblStats == null) {
return true;
}
- long rowCount = getRowCount();
- // TODO: Do we need to analyze an empty table?
- if (rowCount == 0) {
- return false;
- }
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
@@ -1139,6 +1134,7 @@ public class OlapTable extends Table {
.collect(Collectors.toSet()))) {
return true;
}
+ long rowCount = getRowCount();
long updateRows = tblStats.updatedRows.get();
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index 7878a065488..460475198eb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -21,6 +21,8 @@ import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.annotations.VisibleForTesting;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
import java.util.StringJoiner;
/**
@@ -66,6 +68,17 @@ public class ColStatsData {
updateTime = null;
}
+ public ColStatsData(StatsId statsId) {
+ this.statsId = statsId;
+ count = 0;
+ ndv = 0;
+ nullCount = 0;
+ minLit = null;
+ maxLit = null;
+ dataSizeInBytes = 0;
+ updateTime =
LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
+ }
+
public ColStatsData(ResultRow row) {
this.statsId = new StatsId(row);
this.count = (long) Double.parseDouble(row.get(7));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 06c181b78ef..396e25b5a16 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -33,6 +33,7 @@ import org.apache.commons.text.StringSubstitutor;
import java.security.SecureRandom;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@@ -61,9 +62,9 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
public void doExecute() throws Exception {
Set<String> partitionNames = info.colToPartitions.get(info.colName);
if (partitionNames.isEmpty()) {
- LOG.debug("Skip empty empty partition task for column {} in
{}.{}.{}",
- info.catalogId, info.dbId, info.tblId, info.colName);
- job.appendBuf(this, Collections.emptyList());
+ StatsId statsId = new StatsId(concatColumnStatsId(),
info.catalogId, info.dbId,
+ info.tblId, info.indexId, info.colName, null);
+ job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
return;
}
if (tableSample != null) {
@@ -308,4 +309,14 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
&& keysNum == 1
&& (keysType.equals(KeysType.UNIQUE_KEYS) ||
keysType.equals(KeysType.AGG_KEYS));
}
+
+ protected String concatColumnStatsId() {
+ StringBuilder stringBuilder = new StringBuilder();
+ stringBuilder.append(info.tblId);
+ stringBuilder.append("-");
+ stringBuilder.append(info.indexId);
+ stringBuilder.append("-");
+ stringBuilder.append(info.colName);
+ return stringBuilder.toString();
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
index 9a897f4e70e..21395638cd6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
@@ -57,6 +57,16 @@ public class StatsId {
this.partId = row.get(6);
}
+ public StatsId(String id, long catalogId, long dbId, long tblId, long
idxId, String colId, String partId) {
+ this.id = id;
+ this.catalogId = catalogId;
+ this.dbId = dbId;
+ this.tblId = tblId;
+ this.idxId = idxId;
+ this.colId = colId;
+ this.partId = partId;
+ }
+
public String toSQL() {
StringJoiner sj = new StringJoiner(",");
sj.add(StatisticsUtil.quote(StatisticsUtil.escapeSQL(id)));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index d6f61aa9ebc..a9429a35578 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -525,6 +525,10 @@ public class StatisticsUtil {
* @return Health, the value range is [0, 100], the larger the value, the
healthier the statistics of the table.
*/
public static int getTableHealth(long totalRows, long updatedRows) {
+ // Avoid analyze empty table every time.
+ if (totalRows == 0 && updatedRows == 0) {
+ return 100;
+ }
if (updatedRows >= totalRows) {
return 0;
} else {
diff --git a/regression-test/suites/statistics/analyze_stats.groovy
b/regression-test/suites/statistics/analyze_stats.groovy
index 61f18c8060c..73d3263487d 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -1305,45 +1305,200 @@ PARTITION `p599` VALUES IN (599)
assert
"111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
[...]
assert
"111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
[...]
-
- // Test trigger type.
- sql """DROP DATABASE IF EXISTS trigger"""
- sql """CREATE DATABASE IF NOT EXISTS trigger"""
- sql """USE trigger"""
- sql """
- CREATE TABLE if not exists trigger_test(
- `id` int NOT NULL,
- `name` VARCHAR(152)
- )ENGINE=OLAP
- DUPLICATE KEY(`id`)
- COMMENT "OLAP"
- DISTRIBUTED BY HASH(`id`) BUCKETS 1
- PROPERTIES (
- "replication_num" = "1"
- );
- """
- sql """insert into trigger_test values(1,'name1') """
- sql """analyze database trigger PROPERTIES("use.auto.analyzer"="true")"""
-
- int i = 0;
- for (0; i < 10; i++) {
- def result = sql """show column stats trigger_test"""
- if (result.size <= 0) {
- Thread.sleep(1000)
- continue;
- }
- assertEquals(result.size(), 2)
- assertEquals(result[0][10], "SYSTEM")
- assertEquals(result[1][10], "SYSTEM")
- break
- }
- if (i < 10) {
- sql """analyze table trigger_test with sync"""
- def result = sql """show column stats trigger_test"""
- assertEquals(result.size(), 2)
- assertEquals(result[0][10], "MANUAL")
- assertEquals(result[1][10], "MANUAL")
- }
- sql """DROP DATABASE IF EXISTS trigger"""
-
+ // Test analyze empty table
+ sql """TRUNCATE TABLE ${tbl}"""
+ sql """ANALYZE TABLE ${tbl} WITH SYNC"""
+ result_after_truncate = sql """show column stats ${tbl}"""
+ assertEquals(14, result_after_truncate.size())
+ result = sql """show column stats ${tbl}(analyzetestlimitedk0);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk0", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk1);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk1", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk2);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk2", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk3);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk3", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk4);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk4", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk5);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk5", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk6);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk6", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk7);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk7", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk8);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk8", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk9);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk9", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk10);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk10", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk11);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk11", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk12);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk12", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ result = sql """show column stats ${tbl}(analyzetestlimitedk13);"""
+ assertEquals(1, result.size())
+ assertEquals("analyzetestlimitedk13", result[0][0])
+ assertEquals("0.0", result[0][1])
+ assertEquals("0.0", result[0][2])
+ assertEquals("0.0", result[0][3])
+ assertEquals("0.0", result[0][4])
+ assertEquals("0.0", result[0][5])
+ assertEquals("N/A", result[0][6])
+ assertEquals("N/A", result[0][7])
+
+ // Test trigger type.
+ sql """DROP DATABASE IF EXISTS trigger"""
+ sql """CREATE DATABASE IF NOT EXISTS trigger"""
+ sql """USE trigger"""
+ sql """
+ CREATE TABLE if not exists trigger_test(
+ `id` int NOT NULL,
+ `name` VARCHAR(152)
+ )ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ sql """insert into trigger_test values(1,'name1') """
+ sql """analyze database trigger PROPERTIES("use.auto.analyzer"="true")"""
+
+ int i = 0;
+ for (0; i < 10; i++) {
+ def result = sql """show column stats trigger_test"""
+ if (result.size() != 2) {
+ Thread.sleep(1000)
+ continue;
+ }
+ assertEquals(result[0][10], "SYSTEM")
+ assertEquals(result[1][10], "SYSTEM")
+ break
+ }
+ if (i < 10) {
+ sql """analyze table trigger_test with sync"""
+ def result = sql """show column stats trigger_test"""
+ assertEquals(result.size(), 2)
+ assertEquals(result[0][10], "MANUAL")
+ assertEquals(result[1][10], "MANUAL")
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]