This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 4f01c03a188 Improve show column stats performance. (#31298) (#31458)
4f01c03a188 is described below
commit 4f01c03a1889810257e9e380dc5667ed9598fe0d
Author: Jibing-Li <[email protected]>
AuthorDate: Tue Feb 27 19:47:56 2024 +0800
Improve show column stats performance. (#31298) (#31458)
---
.../apache/doris/analysis/ShowColumnStatsStmt.java | 4 +++
.../java/org/apache/doris/qe/ShowExecutor.java | 32 +++++++++++++++--
.../doris/statistics/StatisticsRepository.java | 13 +++++++
.../suites/statistics/test_analyze_mv.groovy | 42 ++++++++++++++++++++--
4 files changed, 86 insertions(+), 5 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
index be1a54eb8d2..a8d0284c138 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
@@ -179,4 +179,8 @@ public class ShowColumnStatsStmt extends ShowStmt {
public boolean isCached() {
return cached;
}
+
+ public boolean isAllColumns() {
+ return columnNames == null;
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index 1c05fd19027..ff9a41af076 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -200,6 +200,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.statistics.AnalysisInfo;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.Histogram;
+import org.apache.doris.statistics.ResultRow;
import org.apache.doris.statistics.StatisticsRepository;
import org.apache.doris.statistics.TableStatsMeta;
import org.apache.doris.statistics.query.QueryStatsUtil;
@@ -2487,7 +2488,35 @@ public class ShowExecutor {
Set<String> columnNames = showColumnStatsStmt.getColumnNames();
PartitionNames partitionNames =
showColumnStatsStmt.getPartitionNames();
boolean showCache = showColumnStatsStmt.isCached();
+ boolean isAllColumns = showColumnStatsStmt.isAllColumns();
+ if (isAllColumns && !showCache && partitionNames == null) {
+ getStatsForAllColumns(columnStatistics, tableIf);
+ } else {
+ getStatsForSpecifiedColumns(columnStatistics, columnNames,
tableIf, showCache, tableName, partitionNames);
+ }
+ resultSet = showColumnStatsStmt.constructResultSet(columnStatistics);
+ }
+
+ private void getStatsForAllColumns(List<Pair<Pair<String, String>,
ColumnStatistic>> columnStatistics,
+ TableIf tableIf) throws
AnalysisException {
+ List<ResultRow> resultRows =
StatisticsRepository.queryColumnStatisticsForTable(tableIf.getId());
+ for (ResultRow row : resultRows) {
+ String indexName = "N/A";
+ long indexId = Long.parseLong(row.get(4));
+ if (indexId != -1) {
+ indexName = ((OlapTable) tableIf).getIndexNameById(indexId);
+ if (indexName == null) {
+ continue;
+ }
+ }
+ columnStatistics.add(Pair.of(Pair.of(row.get(5), indexName),
ColumnStatistic.fromResultRow(row)));
+ }
+ }
+ private void getStatsForSpecifiedColumns(List<Pair<Pair<String, String>,
ColumnStatistic>> columnStatistics,
+ Set<String> columnNames, TableIf
tableIf, boolean showCache,
+ TableName tableName,
PartitionNames partitionNames)
+ throws AnalysisException {
for (String colName : columnNames) {
// Olap base index use -1 as index id.
List<Long> indexIds = Lists.newArrayList();
@@ -2518,13 +2547,12 @@ public class ShowExecutor {
} else {
String finalIndexName = indexName;
columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName,
- colName,
showColumnStatsStmt.getPartitionNames().getPartitionNames())
+ colName, partitionNames.getPartitionNames())
.stream().map(s -> Pair.of(Pair.of(colName,
finalIndexName), s))
.collect(Collectors.toList()));
}
}
}
- resultSet = showColumnStatsStmt.constructResultSet(columnStatistics);
}
public void handleShowColumnHist() {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 3bf5b9fc019..75b827ecd9d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -104,6 +104,11 @@ public class StatisticsRepository {
+ " ${inPredicate}"
+ " AND part_id IS NOT NULL";
+ private static final String FETCH_TABLE_STATISTICS = "SELECT * FROM "
+ + FeConstants.INTERNAL_DB_NAME + "." +
StatisticConstants.STATISTIC_TBL_NAME
+ + " WHERE tbl_id = ${tblId}"
+ + " AND part_id IS NULL";
+
public static ColumnStatistic queryColumnStatisticsByName(long tableId,
long indexId, String colName) {
ResultRow resultRow = queryColumnStatisticById(tableId, indexId,
colName);
if (resultRow == null) {
@@ -128,6 +133,14 @@ public class StatisticsRepository {
Collectors.toList());
}
+ public static List<ResultRow> queryColumnStatisticsForTable(long tableId)
+ throws AnalysisException {
+ Map<String, String> params = new HashMap<>();
+ params.put("tblId", String.valueOf(tableId));
+ List<ResultRow> rows =
StatisticsUtil.executeQuery(FETCH_TABLE_STATISTICS, params);
+ return rows == null ? Collections.emptyList() : rows;
+ }
+
public static ResultRow queryColumnStatisticById(long tblId, long indexId,
String colName) {
return queryColumnStatisticById(tblId, indexId, colName, false);
}
diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy
b/regression-test/suites/statistics/test_analyze_mv.groovy
index 65571c65bf4..d5611cd985b 100644
--- a/regression-test/suites/statistics/test_analyze_mv.groovy
+++ b/regression-test/suites/statistics/test_analyze_mv.groovy
@@ -74,6 +74,25 @@ suite("test_analyze_mv") {
}
}
+ def verify_column_stats = { all_column_result, one_column_result ->
+ logger.info("all column result: " + all_column_result)
+ logger.info("one column result: " + one_column_result)
+ boolean found = false;
+ for (int i = 0; i < all_column_result.size(); i++) {
+ if (all_column_result[i][0] == one_column_result[0] &&
all_column_result[i][1] == one_column_result[1]) {
+ assertEquals(all_column_result[i][2], one_column_result[2])
+ assertEquals(all_column_result[i][3], one_column_result[3])
+ assertEquals(all_column_result[i][4], one_column_result[4])
+ assertEquals(all_column_result[i][5], one_column_result[5])
+ assertEquals(all_column_result[i][6], one_column_result[6])
+ assertEquals(all_column_result[i][7], one_column_result[7])
+ assertEquals(all_column_result[i][8], one_column_result[8])
+ found = true;
+ }
+ }
+ assertTrue(found)
+ }
+
sql """drop database if exists test_analyze_mv"""
sql """create database test_analyze_mv"""
sql """use test_analyze_mv"""
@@ -102,10 +121,13 @@ suite("test_analyze_mv") {
sql """analyze table mvTestDup with sync;"""
- def result_sample = sql """show column stats mvTestDup"""
- assertEquals(12, result_sample.size())
+ // Compare show whole table column stats result with show single column.
+ def result_all = sql """show column stats mvTestDup"""
+ assertEquals(12, result_all.size())
+ def result_all_cached = sql """show column cached stats mvTestDup"""
+ assertEquals(12, result_all_cached.size())
- result_sample = sql """show column stats mvTestDup(key1)"""
+ def result_sample = sql """show column stats mvTestDup(key1)"""
assertEquals(1, result_sample.size())
assertEquals("key1", result_sample[0][0])
assertEquals("N/A", result_sample[0][1])
@@ -114,6 +136,8 @@ suite("test_analyze_mv") {
assertEquals("1", result_sample[0][7])
assertEquals("1001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])
+ verify_column_stats(result_all, result_sample[0])
+ verify_column_stats(result_all_cached, result_sample[0])
result_sample = sql """show column stats mvTestDup(value1)"""
assertEquals(1, result_sample.size())
@@ -124,6 +148,8 @@ suite("test_analyze_mv") {
assertEquals("3", result_sample[0][7])
assertEquals("3001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])
+ verify_column_stats(result_all, result_sample[0])
+ verify_column_stats(result_all_cached, result_sample[0])
result_sample = sql """show column stats mvTestDup(mv_key1)"""
assertEquals(2, result_sample.size())
@@ -138,6 +164,10 @@ suite("test_analyze_mv") {
assertEquals("1", result_sample[0][7])
assertEquals("1001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])
+ verify_column_stats(result_all, result_sample[0])
+ verify_column_stats(result_all_cached, result_sample[0])
+ verify_column_stats(result_all, result_sample[1])
+ verify_column_stats(result_all_cached, result_sample[1])
result_sample = sql """show column stats
mvTestDup(`mva_SUM__CAST(``value1`` AS BIGINT)`)"""
assertEquals(1, result_sample.size())
@@ -148,6 +178,8 @@ suite("test_analyze_mv") {
assertEquals("6", result_sample[0][7])
assertEquals("3001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])
+ verify_column_stats(result_all, result_sample[0])
+ verify_column_stats(result_all_cached, result_sample[0])
result_sample = sql """show column stats
mvTestDup(`mva_MAX__``value2```)"""
assertEquals(1, result_sample.size())
@@ -158,6 +190,8 @@ suite("test_analyze_mv") {
assertEquals("4", result_sample[0][7])
assertEquals("4001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])
+ verify_column_stats(result_all, result_sample[0])
+ verify_column_stats(result_all_cached, result_sample[0])
result_sample = sql """show column stats
mvTestDup(`mva_MIN__``value3```)"""
assertEquals(1, result_sample.size())
@@ -168,6 +202,8 @@ suite("test_analyze_mv") {
assertEquals("5", result_sample[0][7])
assertEquals("5001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])
+ verify_column_stats(result_all, result_sample[0])
+ verify_column_stats(result_all_cached, result_sample[0])
sql """CREATE TABLE mvTestAgg (
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]