This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 415d9e06c2e branch-3.0: [improvement](statistics)Skip auto analyze
empty table. #43865 (#44035)
415d9e06c2e is described below
commit 415d9e06c2e2bd7f00db8625983362ddfeb7e994
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Nov 15 20:01:00 2024 +0800
branch-3.0: [improvement](statistics)Skip auto analyze empty table. #43865
(#44035)
Cherry-picked from #43865
Co-authored-by: James <[email protected]>
---
.../doris/statistics/StatisticsAutoCollector.java | 24 ++++++++++++-----
.../apache/doris/statistics/TableStatsMeta.java | 4 +++
.../statistics/StatisticsAutoCollectorTest.java | 2 +-
.../suites/statistics/test_analyze_mv.groovy | 31 +++++++++++++++++++++-
.../test_auto_analyze_black_white_list.groovy | 3 ++-
5 files changed, 54 insertions(+), 10 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index 28c2bd95c96..38af43bef73 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -145,9 +145,6 @@ public class StatisticsAutoCollector extends MasterDaemon {
// appendMvColumn(table, columns);
appendAllColumns(table, columns);
columns = columns.stream().filter(c ->
StatisticsUtil.needAnalyzeColumn(table, c)).collect(Collectors.toSet());
- if (columns.isEmpty()) {
- return;
- }
AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns,
priority);
if (analyzeJob == null) {
return;
@@ -206,6 +203,8 @@ public class StatisticsAutoCollector extends MasterDaemon {
if (StatisticsUtil.enablePartitionAnalyze() &&
table.isPartitionedTable()) {
analysisMethod = AnalysisMethod.FULL;
}
+ AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
+ TableStatsMeta tableStatsStatus =
manager.findTableStatsStatus(table.getId());
if (table instanceof OlapTable &&
analysisMethod.equals(AnalysisMethod.SAMPLE)) {
OlapTable ot = (OlapTable) table;
if (ot.getRowCountForIndex(ot.getBaseIndexId(), true) ==
TableIf.UNKNOWN_ROW_COUNT) {
@@ -213,10 +212,21 @@ public class StatisticsAutoCollector extends MasterDaemon
{
return null;
}
}
- AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
- TableStatsMeta tableStatsStatus =
manager.findTableStatsStatus(table.getId());
- long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0
:
- (table.getRowCount() <= 0 ? table.fetchRowCount() :
table.getRowCount());
+ // We don't auto analyze empty table to avoid all 0 stats.
+ // Because all 0 is more dangerous than unknown stats when row count
report is delayed.
+ long rowCount = table.getRowCount();
+ if (rowCount <= 0) {
+ LOG.info("Table {} is empty, remove its old stats and skip auto
analyze it.", table.getName());
+ // Remove the table's old stats if exists.
+ if (tableStatsStatus != null &&
!tableStatsStatus.isColumnsStatsEmpty()) {
+ manager.dropStats(table, null);
+ }
+ return null;
+ }
+ if (jobColumns == null || jobColumns.isEmpty()) {
+ return null;
+ }
+ LOG.info("Auto analyze table {} row count is {}", table.getName(),
rowCount);
StringJoiner stringJoiner = new StringJoiner(",", "[", "]");
for (Pair<String, String> pair : jobColumns) {
stringJoiner.add(pair.toString());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index 9c8959d807b..6a7f2933996 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -255,4 +255,8 @@ public class TableStatsMeta implements Writable,
GsonPostProcessable {
}
return updatedRows.get() - maxUpdateRows;
}
+
+ public boolean isColumnsStatsEmpty() {
+ return colToColStatsMeta == null || colToColStatsMeta.isEmpty();
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
index 9eb2004ec25..026b7fb65b0 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
@@ -166,6 +166,6 @@ public class StatisticsAutoCollectorTest {
return 100;
}
};
- Assertions.assertThrows(NullPointerException.class, () ->
collector.createAnalyzeJobForTbl(table, null, null));
+ Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null,
null));
}
}
diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy
b/regression-test/suites/statistics/test_analyze_mv.groovy
index 77eb65789cd..64e86439be1 100644
--- a/regression-test/suites/statistics/test_analyze_mv.groovy
+++ b/regression-test/suites/statistics/test_analyze_mv.groovy
@@ -671,7 +671,7 @@ suite("test_analyze_mv") {
verifyTaskStatus(result_sample, "mva_MIN__`value3`", "mv3")
verifyTaskStatus(result_sample, "mva_SUM__CAST(`value1` AS bigint)", "mv3")
- // Test row count report and report for nereids
+ // * Test row count report and report for nereids
sql """truncate table mvTestDup"""
result_row = sql """show index stats mvTestDup mv3"""
assertEquals(1, result_row.size())
@@ -680,6 +680,18 @@ suite("test_analyze_mv") {
assertEquals("0", result_row[0][3])
assertEquals("-1", result_row[0][4])
+ // ** Embedded test for skip auto analyze when table is empty
+ sql """analyze table mvTestDup properties ("use.auto.analyzer" = "true")"""
+ def empty_test = sql """show auto analyze mvTestDup"""
+ assertEquals(0, empty_test.size())
+ empty_test = sql """show column stats mvTestDup"""
+ assertEquals(0, empty_test.size())
+ // ** End of embedded test
+
+ sql """analyze table mvTestDup with sync"""
+ empty_test = sql """show column stats mvTestDup"""
+ assertEquals(12, empty_test.size())
+
for (int i = 0; i < 120; i++) {
result_row = sql """show index stats mvTestDup mv3"""
logger.info("mv3 stats: " + result_row)
@@ -694,6 +706,23 @@ suite("test_analyze_mv") {
assertEquals("mv3", result_row[0][1])
assertEquals("0", result_row[0][3])
assertEquals("0", result_row[0][4])
+
+ // ** Embedded test for skip auto analyze when table is empty again
+ sql """analyze table mvTestDup properties ("use.auto.analyzer" = "true")"""
+ empty_test = sql """show auto analyze mvTestDup"""
+ assertEquals(0, empty_test.size())
+ empty_test = sql """show column stats mvTestDup"""
+ for (int i = 0; i < 100; i++) {
+ empty_test = sql """show column stats mvTestDup"""
+ if (empty_test.size() != 0) {
+ logger.info("async delete is not finished yet.")
+ Thread.sleep(1000)
+ }
+ break
+ }
+ assertEquals(0, empty_test.size())
+ // ** End of embedded test
+
sql """insert into mvTestDup values (1, 2, 3, 4, 5), (1, 2, 3, 4, 5), (10,
20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001,
3001, 4001, 5001);"""
result_row = sql """show index stats mvTestDup mv3"""
assertEquals(1, result_row.size())
diff --git
a/regression-test/suites/statistics/test_auto_analyze_black_white_list.groovy
b/regression-test/suites/statistics/test_auto_analyze_black_white_list.groovy
index 89da74d8702..f27ecdc5e16 100644
---
a/regression-test/suites/statistics/test_auto_analyze_black_white_list.groovy
+++
b/regression-test/suites/statistics/test_auto_analyze_black_white_list.groovy
@@ -68,8 +68,9 @@ suite("test_auto_analyze_black_white_list") {
)
"""
+ sql """insert into test_bw values (1, 1, 1, 1, 1)"""
try {
- wait_row_count_reported("test_auto_analyze_black_white_list",
"test_bw", 0, 4, "0")
+ wait_row_count_reported("test_auto_analyze_black_white_list",
"test_bw", 0, 4, "1")
} catch (Exception e) {
logger.info(e.getMessage());
return;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]