This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new a4853a6ea47 [improvement](statistics)Support get index row count and
table delta rows. (#38492)
a4853a6ea47 is described below
commit a4853a6ea47f0284e11e0d738dd1ea984378d7aa
Author: Jibing-Li <[email protected]>
AuthorDate: Wed Jul 31 10:38:34 2024 +0800
[improvement](statistics)Support get index row count and table delta rows.
(#38492)
Provide get row count for a given index id and get table delta rows API
in TableStatsMeta class. This is for Nereids stats calculator to fetch
row count.
---
fe/fe-core/src/main/cup/sql_parser.cup | 7 +++-
.../doris/analysis/AlterColumnStatsStmt.java | 4 +++
.../apache/doris/analysis/ShowTableStatsStmt.java | 42 ++++++++++++++++++++--
.../org/apache/doris/statistics/AnalysisInfo.java | 7 ++++
.../apache/doris/statistics/BaseAnalysisTask.java | 8 +++++
.../doris/statistics/StatisticsRepository.java | 13 +++++--
.../apache/doris/statistics/TableStatsMeta.java | 42 ++++++++++++++++++++++
.../suites/statistics/test_analyze_mv.groovy | 39 ++++++++++++++++++++
8 files changed, 155 insertions(+), 7 deletions(-)
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup
b/fe/fe-core/src/main/cup/sql_parser.cup
index 9e42b34657c..d3b2bc4421b 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -4599,7 +4599,12 @@ show_param ::=
/* show table stats */
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl
opt_partition_names:partitionNames opt_col_list:cols
{:
- RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached);
+ RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached,
null);
+ :}
+ /* show index stats */
+ | KW_INDEX KW_STATS table_name:tbl ident:id
+ {:
+ RESULT = new ShowTableStatsStmt(tbl, null, null, false, id);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols
opt_partition_names:partitionNames
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
index 40c8600381e..e5cc4bff614 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
@@ -134,6 +134,10 @@ public class AlterColumnStatsStmt extends DdlStmt {
throw new AnalysisException(optional.get() + " is invalid
statistics");
}
+ if (!properties.containsKey(StatsType.ROW_COUNT.getValue())) {
+ throw new AnalysisException("Set column stats must set row_count.
e.g. 'row_count'='5'");
+ }
+
// get statsTypeToValue
properties.forEach((key, value) -> {
StatsType statsType = StatsType.fromString(key);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index 5edfbe05886..36a0e9a5872 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -70,6 +70,13 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("row_count")
.build();
+ private static final ImmutableList<String> INDEX_TITLE_NAMES =
+ new ImmutableList.Builder<String>()
+ .add("table_name")
+ .add("index_name")
+ .add("row_count")
+ .build();
+
private static final ImmutableList<String> COLUMN_PARTITION_TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("index_name")
@@ -82,15 +89,17 @@ public class ShowTableStatsStmt extends ShowStmt {
private final List<String> columnNames;
private final PartitionNames partitionNames;
private final boolean cached;
+ private final String indexName;
private TableIf table;
public ShowTableStatsStmt(TableName tableName, List<String> columnNames,
- PartitionNames partitionNames, boolean cached) {
+ PartitionNames partitionNames, boolean cached,
String indexName) {
this.tableName = tableName;
this.columnNames = columnNames;
this.partitionNames = partitionNames;
this.cached = cached;
+ this.indexName = indexName;
}
public TableName getTableName() {
@@ -141,8 +150,10 @@ public class ShowTableStatsStmt extends ShowStmt {
ShowResultSetMetaData.Builder builder =
ShowResultSetMetaData.builder();
ImmutableList<String> titles;
- // If columnNames != null, partitionNames is also not null. Guaranteed
in analyze()
- if (columnNames != null) {
+ if (indexName != null) {
+ titles = INDEX_TITLE_NAMES;
+ } else if (columnNames != null) {
+ // If columnNames != null, partitionNames is also not null.
Guaranteed in analyze()
titles = COLUMN_PARTITION_TITLE_NAMES;
} else if (partitionNames != null) {
titles = PARTITION_TITLE_NAMES;
@@ -160,6 +171,9 @@ public class ShowTableStatsStmt extends ShowStmt {
}
public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
+ if (indexName != null) {
+ return constructIndexResultSet(tableStatistic);
+ }
if (partitionNames == null) {
return constructTableResultSet(tableStatistic);
}
@@ -238,6 +252,28 @@ public class ShowTableStatsStmt extends ShowStmt {
return new ShowResultSet(getMetaData(), result);
}
+ public ShowResultSet constructIndexResultSet(TableStatsMeta
tableStatistic) {
+ List<List<String>> result = Lists.newArrayList();
+ if (!(table instanceof OlapTable)) {
+ return new ShowResultSet(getMetaData(), result);
+ }
+ OlapTable olapTable = (OlapTable) table;
+ Long indexId = olapTable.getIndexIdByName(indexName);
+ if (indexId == null) {
+ throw new RuntimeException(String.format("Index %s not exist.",
indexName));
+ }
+ long rowCount =
tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
+ if (rowCount == -1) {
+ return new ShowResultSet(getMetaData(), result);
+ }
+ List<String> row = Lists.newArrayList();
+ row.add(table.getName());
+ row.add(indexName);
+ row.add(String.valueOf(rowCount));
+ result.add(row);
+ return new ShowResultSet(getMetaData(), result);
+ }
+
public ShowResultSet constructColumnPartitionResultSet(TableStatsMeta
tableStatistic) {
List<List<String>> result = Lists.newArrayList();
if (!(table instanceof OlapTable)) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index 00e8dd0bdbb..6ec413821ea 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -40,6 +40,7 @@ import java.util.Map;
import java.util.Set;
import java.util.StringJoiner;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
public class AnalysisInfo implements Writable {
@@ -196,6 +197,8 @@ public class AnalysisInfo implements Writable {
@SerializedName("ep")
public final boolean enablePartition;
+ public final ConcurrentMap<Long, Long> indexesRowCount = new
ConcurrentHashMap<>();
+
public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long
catalogId, long dbId, long tblId,
Set<Pair<String, String>> jobColumns, Set<String> partitionNames,
String colName, Long indexId,
JobType jobType, AnalysisMethod analysisMethod, AnalysisType
analysisType,
@@ -350,4 +353,8 @@ public class AnalysisInfo implements Writable {
public TableIf getTable() {
return StatisticsUtil.findTable(catalogId, dbId, tblId);
}
+
+ public void addIndexRowCount(long indexId, long rowCount) {
+ indexesRowCount.put(indexId, rowCount);
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 89514effb13..329231f3604 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -498,6 +498,14 @@ public abstract class BaseAnalysisTask {
try (AutoCloseConnectContext a =
StatisticsUtil.buildConnectContext()) {
stmtExecutor = new StmtExecutor(a.connectContext, sql);
ColStatsData colStatsData = new
ColStatsData(stmtExecutor.executeInternalQuery().get(0));
+ // Update index row count after analyze.
+ if (this instanceof OlapAnalysisTask) {
+ AnalysisInfo jobInfo =
Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId);
+ // For sync job, get jobInfo from job.jobInfo.
+ jobInfo = jobInfo == null ? job.jobInfo : jobInfo;
+ long indexId = info.indexId == -1 ? ((OlapTable)
tbl).getBaseIndexId() : info.indexId;
+ jobInfo.addIndexRowCount(indexId, colStatsData.count);
+ }
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
queryId = DebugUtil.printId(stmtExecutor.getContext().queryId());
job.appendBuf(this, Collections.singletonList(colStatsData));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 81192762f99..6202d40f1d9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -21,6 +21,7 @@ import org.apache.doris.analysis.AlterColumnStatsStmt;
import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
@@ -313,12 +314,13 @@ public class StatisticsRepository {
String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE);
String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE);
long indexId = alterColumnStatsStmt.getIndexId();
+ if (rowCount == null) {
+ throw new RuntimeException("Row count is null.");
+ }
ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
String colName = alterColumnStatsStmt.getColumnName();
Column column = objects.table.getColumn(colName);
- if (rowCount != null) {
- builder.setCount(Double.parseDouble(rowCount));
- }
+ builder.setCount(Double.parseDouble(rowCount));
if (ndv != null) {
double dNdv = Double.parseDouble(ndv);
builder.setNdv(dNdv);
@@ -372,10 +374,15 @@ public class StatisticsRepository {
AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder()
.setTblUpdateTime(System.currentTimeMillis())
.setColName("")
+ .setRowCount((long) Double.parseDouble(rowCount))
.setJobColumns(Sets.newHashSet())
.setUserInject(true)
.setJobType(AnalysisInfo.JobType.MANUAL)
.build();
+ if (objects.table instanceof OlapTable) {
+ indexId = indexId == -1 ? ((OlapTable)
objects.table).getBaseIndexId() : indexId;
+ mockedJobInfo.addIndexRowCount(indexId, (long)
Double.parseDouble(rowCount));
+ }
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo,
objects.table);
} else {
// update partition granularity statistics
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index 1048708958b..4296cb02526 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -35,6 +35,9 @@ import com.google.gson.annotations.SerializedName;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
@@ -82,6 +85,9 @@ public class TableStatsMeta implements Writable,
GsonPostProcessable {
@SerializedName("pur")
public ConcurrentMap<Long, Long> partitionUpdateRows = new
ConcurrentHashMap<>();
+ @SerializedName("irc")
+ public ConcurrentMap<Long, Long> indexesRowCount = new
ConcurrentHashMap<>();
+
@VisibleForTesting
public TableStatsMeta() {
tblId = 0;
@@ -154,6 +160,10 @@ public class TableStatsMeta implements Writable,
GsonPostProcessable {
}
jobType = analyzedJob.jobType;
if (tableIf != null) {
+ if (tableIf instanceof OlapTable) {
+ indexesRowCount.putAll(analyzedJob.indexesRowCount);
+ clearStaleIndexRowCount((OlapTable) tableIf);
+ }
rowCount = analyzedJob.rowCount;
if (rowCount == 0 &&
AnalysisMethod.SAMPLE.equals(analyzedJob.analysisMethod)) {
return;
@@ -178,5 +188,37 @@ public class TableStatsMeta implements Writable,
GsonPostProcessable {
if (partitionUpdateRows == null) {
partitionUpdateRows = new ConcurrentHashMap<>();
}
+ if (indexesRowCount == null) {
+ indexesRowCount = new ConcurrentHashMap<>();
+ }
+ }
+
+ public long getRowCount(long indexId) {
+ return indexesRowCount.getOrDefault(indexId, -1L);
+ }
+
+ private void clearStaleIndexRowCount(OlapTable table) {
+ Iterator<Long> iterator = indexesRowCount.keySet().iterator();
+ List<Long> indexIds = table.getIndexIds();
+ while (iterator.hasNext()) {
+ long key = iterator.next();
+ if (indexIds.contains(key)) {
+ iterator.remove();
+ }
+ }
+ }
+
+ public long getBaseIndexDeltaRowCount(OlapTable table) {
+ if (colToColStatsMeta == null) {
+ return -1;
+ }
+ long maxUpdateRows = 0;
+ String baseIndexName = table.getIndexNameById(table.getBaseIndexId());
+ for (Map.Entry<Pair<String, String>, ColStatsMeta> entry :
colToColStatsMeta.entrySet()) {
+ if (entry.getKey().first.equals(baseIndexName) &&
entry.getValue().updatedRows > maxUpdateRows) {
+ maxUpdateRows = entry.getValue().updatedRows;
+ }
+ }
+ return updatedRows.get() - maxUpdateRows;
}
}
diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy
b/regression-test/suites/statistics/test_analyze_mv.groovy
index 704d4212557..862949b8c93 100644
--- a/regression-test/suites/statistics/test_analyze_mv.groovy
+++ b/regression-test/suites/statistics/test_analyze_mv.groovy
@@ -134,6 +134,28 @@ suite("test_analyze_mv") {
sql """analyze table mvTestDup with sync;"""
+ // Test show index row count
+ def result_row = sql """show index stats mvTestDup mvTestDup"""
+ assertEquals(1, result_row.size())
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mvTestDup", result_row[0][1])
+ assertEquals("6", result_row[0][2])
+ result_row = sql """show index stats mvTestDup mv1"""
+ assertEquals(1, result_row.size())
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv1", result_row[0][1])
+ assertEquals("6", result_row[0][2])
+ result_row = sql """show index stats mvTestDup mv2"""
+ assertEquals(1, result_row.size())
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv2", result_row[0][1])
+ assertEquals("6", result_row[0][2])
+ result_row = sql """show index stats mvTestDup mv3"""
+ assertEquals(1, result_row.size())
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv3", result_row[0][1])
+ assertEquals("4", result_row[0][2])
+
// Compare show whole table column stats result with show single column.
def result_all = sql """show column stats mvTestDup"""
assertEquals(12, result_all.size())
@@ -411,6 +433,23 @@ suite("test_analyze_mv") {
assertEquals("4001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])
+ // Test alter table index row count.
+ sql """alter table mvTestDup modify column `value2` set stats
('row_count'='1.5E8', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8',
'min_value'='1', 'max_value'='10');"""
+ result_row = sql """show index stats mvTestDup mvTestDup;"""
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mvTestDup", result_row[0][1])
+ assertEquals("150000000", result_row[0][2])
+ sql """alter table mvTestDup index mv1 modify column `mv_key1` set stats
('row_count'='3443', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8',
'min_value'='1', 'max_value'='10');"""
+ result_row = sql """show index stats mvTestDup mv1;"""
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv1", result_row[0][1])
+ assertEquals("3443", result_row[0][2])
+ sql """alter table mvTestDup index mv3 modify column `mva_MAX__``value2```
set stats ('row_count'='234234', 'ndv'='3.0', 'num_nulls'='0.0',
'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
+ result_row = sql """show index stats mvTestDup mv3;"""
+ assertEquals("mvTestDup", result_row[0][0])
+ assertEquals("mv3", result_row[0][1])
+ assertEquals("234234", result_row[0][2])
+
sql """drop stats mvTestDup"""
result_sample = sql """show column stats mvTestDup"""
assertEquals(0, result_sample.size())
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]