This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new c78e04ac31a [improvement](statistics)Support get index row count and
table delta rows. (#38520)
c78e04ac31a is described below
commit c78e04ac31a04ae389e812dea3e96469e3146ed2
Author: Jibing-Li <[email protected]>
AuthorDate: Wed Jul 31 10:39:21 2024 +0800
[improvement](statistics)Support get index row count and table delta rows.
(#38520)
backport: https://github.com/apache/doris/pull/38492
---
fe/fe-core/src/main/cup/sql_parser.cup | 7 ++-
.../doris/analysis/AlterColumnStatsStmt.java | 4 ++
.../apache/doris/analysis/ShowTableStatsStmt.java | 72 ++++++++++++++++------
.../org/apache/doris/statistics/AnalysisInfo.java | 8 +++
.../apache/doris/statistics/BaseAnalysisTask.java | 9 +++
.../doris/statistics/StatisticsRepository.java | 13 +++-
.../apache/doris/statistics/TableStatsMeta.java | 32 +++++++++-
.../suites/statistics/test_analyze_mv.groovy | 39 ++++++++++++
8 files changed, 161 insertions(+), 23 deletions(-)
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup
b/fe/fe-core/src/main/cup/sql_parser.cup
index f5b5ead9cc6..90534059cf9 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -4362,7 +4362,12 @@ show_param ::=
/* show table stats */
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl
opt_partition_names:partitionNames
{:
- RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached);
+ RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached, null);
+ :}
+ /* show index stats */
+ | KW_INDEX KW_STATS table_name:tbl ident:id
+ {:
+ RESULT = new ShowTableStatsStmt(tbl, null, false, id);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols
opt_partition_names:partitionNames
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
index 9e76b065921..9aebeb59b82 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
@@ -134,6 +134,10 @@ public class AlterColumnStatsStmt extends DdlStmt {
throw new AnalysisException(optional.get() + " is invalid
statistics");
}
+ if (!properties.containsKey(StatsType.ROW_COUNT.getValue())) {
+ throw new AnalysisException("Set column stats must set row_count.
e.g. 'row_count'='5'");
+ }
+
// get statsTypeToValue
properties.forEach((key, value) -> {
StatsType statsType = StatsType.fromString(key);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index 8d8cd32c454..fc0860dfd13 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -20,6 +20,7 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.TableIf;
@@ -58,17 +59,25 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("user_inject")
.build();
- private final TableName tableName;
+ private static final ImmutableList<String> INDEX_TITLE_NAMES =
+ new ImmutableList.Builder<String>()
+ .add("table_name")
+ .add("index_name")
+ .add("row_count")
+ .build();
+ private final TableName tableName;
private final PartitionNames partitionNames;
private final boolean cached;
+ private final String indexName;
private TableIf table;
- public ShowTableStatsStmt(TableName tableName, PartitionNames
partitionNames, boolean cached) {
+ public ShowTableStatsStmt(TableName tableName, PartitionNames
partitionNames, boolean cached, String indexName) {
this.tableName = tableName;
this.partitionNames = partitionNames;
this.cached = cached;
+ this.indexName = indexName;
}
public TableName getTableName() {
@@ -117,7 +126,13 @@ public class ShowTableStatsStmt extends ShowStmt {
public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder =
ShowResultSetMetaData.builder();
- for (String title : TITLE_NAMES) {
+ ImmutableList<String> titles;
+ if (indexName != null) {
+ titles = INDEX_TITLE_NAMES;
+ } else {
+ titles = TITLE_NAMES;
+ }
+ for (String title : titles) {
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
}
return builder.build();
@@ -127,15 +142,29 @@ public class ShowTableStatsStmt extends ShowStmt {
return table;
}
- public long getPartitionId() {
- if (partitionNames == null) {
- return 0;
+ public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
+ if (indexName != null) {
+ return constructIndexResultSet(tableStatistic);
}
- String partitionName = partitionNames.getPartitionNames().get(0);
- return table.getPartition(partitionName).getId();
+ return constructTableResultSet(tableStatistic);
}
- public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
+ public ShowResultSet constructResultSet(long rowCount) {
+ List<List<String>> result = Lists.newArrayList();
+ List<String> row = Lists.newArrayList();
+ row.add("");
+ row.add("");
+ row.add(String.valueOf(rowCount));
+ row.add("");
+ row.add("");
+ row.add("");
+ row.add("");
+ row.add("");
+ result.add(row);
+ return new ShowResultSet(getMetaData(), result);
+ }
+
+ public ShowResultSet constructTableResultSet(TableStatsMeta
tableStatistic) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd
HH:mm:ss");
if (tableStatistic == null) {
return new ShowResultSet(getMetaData(), new ArrayList<>());
@@ -147,7 +176,7 @@ public class ShowTableStatsStmt extends ShowStmt {
row.add(String.valueOf(tableStatistic.rowCount));
LocalDateTime dateTime =
LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime),
- java.time.ZoneId.systemDefault());
+ java.time.ZoneId.systemDefault());
String formattedDateTime = dateTime.format(formatter);
row.add(formattedDateTime);
row.add(tableStatistic.analyzeColumns().toString());
@@ -158,17 +187,24 @@ public class ShowTableStatsStmt extends ShowStmt {
return new ShowResultSet(getMetaData(), result);
}
- public ShowResultSet constructResultSet(long rowCount) {
+ public ShowResultSet constructIndexResultSet(TableStatsMeta
tableStatistic) {
List<List<String>> result = Lists.newArrayList();
+ if (!(table instanceof OlapTable)) {
+ return new ShowResultSet(getMetaData(), result);
+ }
+ OlapTable olapTable = (OlapTable) table;
+ Long indexId = olapTable.getIndexIdByName(indexName);
+ if (indexId == null) {
+ throw new RuntimeException(String.format("Index %s not exist.",
indexName));
+ }
+ long rowCount =
tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
+ if (rowCount == -1) {
+ return new ShowResultSet(getMetaData(), result);
+ }
List<String> row = Lists.newArrayList();
- row.add("");
- row.add("");
+ row.add(table.getName());
+ row.add(indexName);
row.add(String.valueOf(rowCount));
- row.add("");
- row.add("");
- row.add("");
- row.add("");
- row.add("");
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index ab7f9935c72..125b23bce7b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -38,6 +38,8 @@ import java.text.ParseException;
import java.util.List;
import java.util.Set;
import java.util.StringJoiner;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
public class AnalysisInfo implements Writable {
@@ -202,6 +204,8 @@ public class AnalysisInfo implements Writable {
public final boolean userInject;
+ public final ConcurrentMap<Long, Long> indexesRowCount = new
ConcurrentHashMap<>();
+
public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long
catalogId, long dbId, long tblId,
List<Pair<String, String>> jobColumns, Set<String> partitionNames,
String colName, Long indexId,
JobType jobType, AnalysisMode analysisMode, AnalysisMethod
analysisMethod, AnalysisType analysisType,
@@ -350,4 +354,8 @@ public class AnalysisInfo implements Writable {
public TableIf getTable() {
return StatisticsUtil.findTable(catalogId, dbId, tblId);
}
+
+ public void addIndexRowCount(long indexId, long rowCount) {
+ indexesRowCount.put(indexId, rowCount);
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index e04a7274f69..85a2fd0de3f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableSample;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.util.DebugUtil;
import org.apache.doris.datasource.CatalogIf;
@@ -333,6 +334,14 @@ public abstract class BaseAnalysisTask {
try (AutoCloseConnectContext a =
StatisticsUtil.buildConnectContext()) {
stmtExecutor = new StmtExecutor(a.connectContext, sql);
ColStatsData colStatsData = new
ColStatsData(stmtExecutor.executeInternalQuery().get(0));
+ // Update index row count after analyze.
+ if (this instanceof OlapAnalysisTask) {
+ AnalysisInfo jobInfo =
Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId);
+ // For sync job, get jobInfo from job.jobInfo.
+ jobInfo = jobInfo == null ? job.jobInfo : jobInfo;
+ long indexId = info.indexId == -1 ? ((OlapTable)
tbl).getBaseIndexId() : info.indexId;
+ jobInfo.addIndexRowCount(indexId, colStatsData.count);
+ }
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
queryId = DebugUtil.printId(stmtExecutor.getContext().queryId());
job.appendBuf(this, Collections.singletonList(colStatsData));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 87ce90c5300..4d2aede413b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -21,6 +21,7 @@ import org.apache.doris.analysis.AlterColumnStatsStmt;
import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
@@ -278,12 +279,13 @@ public class StatisticsRepository {
String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE);
String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE);
long indexId = alterColumnStatsStmt.getIndexId();
+ if (rowCount == null) {
+ throw new RuntimeException("Row count is null.");
+ }
ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
String colName = alterColumnStatsStmt.getColumnName();
Column column = objects.table.getColumn(colName);
- if (rowCount != null) {
- builder.setCount(Double.parseDouble(rowCount));
- }
+ builder.setCount(Double.parseDouble(rowCount));
if (ndv != null) {
double dNdv = Double.parseDouble(ndv);
builder.setNdv(dNdv);
@@ -338,9 +340,14 @@ public class StatisticsRepository {
.setTblUpdateTime(System.currentTimeMillis())
.setColName("")
.setJobColumns(Lists.newArrayList())
+ .setRowCount((long) Double.parseDouble(rowCount))
.setUserInject(true)
.setJobType(AnalysisInfo.JobType.MANUAL)
.build();
+ if (objects.table instanceof OlapTable) {
+ indexId = indexId == -1 ? ((OlapTable)
objects.table).getBaseIndexId() : indexId;
+ mockedJobInfo.addIndexRowCount(indexId, (long)
Double.parseDouble(rowCount));
+ }
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo,
objects.table);
} else {
// update partition granularity statistics
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index a9a580c8b40..bb2f19ac25c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -24,6 +24,7 @@ import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.Pair;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
+import org.apache.doris.persist.gson.GsonPostProcessable;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.statistics.AnalysisInfo.JobType;
@@ -33,6 +34,8 @@ import com.google.gson.annotations.SerializedName;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
@@ -40,7 +43,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
-public class TableStatsMeta implements Writable {
+public class TableStatsMeta implements Writable, GsonPostProcessable {
@SerializedName("tblId")
public final long tblId;
@@ -77,6 +80,9 @@ public class TableStatsMeta implements Writable {
@SerializedName("userInjected")
public boolean userInjected;
+ @SerializedName("irc")
+ public ConcurrentMap<Long, Long> indexesRowCount = new
ConcurrentHashMap<>();
+
@VisibleForTesting
public TableStatsMeta() {
tblId = 0;
@@ -142,6 +148,8 @@ public class TableStatsMeta implements Writable {
if (tableIf != null) {
if (tableIf instanceof OlapTable) {
rowCount = analyzedJob.rowCount;
+ indexesRowCount.putAll(analyzedJob.indexesRowCount);
+ clearStaleIndexRowCount((OlapTable) tableIf);
}
if (analyzedJob.emptyJob) {
return;
@@ -166,4 +174,26 @@ public class TableStatsMeta implements Writable {
public void convertDeprecatedColStatsToNewVersion() {
deprecatedColNameToColStatsMeta = null;
}
+
+ @Override
+ public void gsonPostProcess() throws IOException {
+ if (indexesRowCount == null) {
+ indexesRowCount = new ConcurrentHashMap<>();
+ }
+ }
+
+ public long getRowCount(long indexId) {
+ return indexesRowCount.getOrDefault(indexId, -1L);
+ }
+
+ private void clearStaleIndexRowCount(OlapTable table) {
+ Iterator<Long> iterator = indexesRowCount.keySet().iterator();
+ List<Long> indexIds = table.getIndexIds();
+ while (iterator.hasNext()) {
+ long key = iterator.next();
+ if (indexIds.contains(key)) {
+ iterator.remove();
+ }
+ }
+ }
}
diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy
b/regression-test/suites/statistics/test_analyze_mv.groovy
index 9128ee2b263..fa9b0701b1a 100644
--- a/regression-test/suites/statistics/test_analyze_mv.groovy
+++ b/regression-test/suites/statistics/test_analyze_mv.groovy
@@ -133,6 +133,28 @@ suite("test_analyze_mv") {
sql """analyze table mvTestDup with sync;"""
+ // Test show index row count
+ def result_row = sql """show index stats mvTestDup mvTestDup"""
+ assertEquals(1, result_row.size())
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mvTestDup", result_row[0][1])
+ assertEquals("6", result_row[0][2])
+ result_row = sql """show index stats mvTestDup mv1"""
+ assertEquals(1, result_row.size())
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv1", result_row[0][1])
+ assertEquals("6", result_row[0][2])
+ result_row = sql """show index stats mvTestDup mv2"""
+ assertEquals(1, result_row.size())
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv2", result_row[0][1])
+ assertEquals("6", result_row[0][2])
+ result_row = sql """show index stats mvTestDup mv3"""
+ assertEquals(1, result_row.size())
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv3", result_row[0][1])
+ assertEquals("4", result_row[0][2])
+
// Compare show whole table column stats result with show single column.
def result_all = sql """show column stats mvTestDup"""
assertEquals(12, result_all.size())
@@ -410,6 +432,23 @@ suite("test_analyze_mv") {
assertEquals("4001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])
+ // Test alter table index row count.
+ sql """alter table mvTestDup modify column `value2` set stats
('row_count'='1.5E8', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8',
'min_value'='1', 'max_value'='10');"""
+ result_row = sql """show index stats mvTestDup mvTestDup;"""
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mvTestDup", result_row[0][1])
+ assertEquals("150000000", result_row[0][2])
+ sql """alter table mvTestDup index mv1 modify column `mv_key1` set stats
('row_count'='3443', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8',
'min_value'='1', 'max_value'='10');"""
+ result_row = sql """show index stats mvTestDup mv1;"""
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv1", result_row[0][1])
+ assertEquals("3443", result_row[0][2])
+ sql """alter table mvTestDup index mv3 modify column `mva_MAX__``value2```
set stats ('row_count'='234234', 'ndv'='3.0', 'num_nulls'='0.0',
'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
+ result_row = sql """show index stats mvTestDup mv3;"""
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv3", result_row[0][1])
+ assertEquals("234234", result_row[0][2])
+
sql """drop stats mvTestDup"""
result_sample = sql """show column stats mvTestDup"""
assertEquals(0, result_sample.size())
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]