This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new a4853a6ea47 [improvement](statistics)Support get index row count and 
table delta rows. (#38492)
a4853a6ea47 is described below

commit a4853a6ea47f0284e11e0d738dd1ea984378d7aa
Author: Jibing-Li <[email protected]>
AuthorDate: Wed Jul 31 10:38:34 2024 +0800

    [improvement](statistics)Support get index row count and table delta rows. 
(#38492)
    
    Provide get row count for a given index id and get table delta rows API
    in TableStatsMeta class. This is for Nereids stats calculator to fetch
    row count.
---
 fe/fe-core/src/main/cup/sql_parser.cup             |  7 +++-
 .../doris/analysis/AlterColumnStatsStmt.java       |  4 +++
 .../apache/doris/analysis/ShowTableStatsStmt.java  | 42 ++++++++++++++++++++--
 .../org/apache/doris/statistics/AnalysisInfo.java  |  7 ++++
 .../apache/doris/statistics/BaseAnalysisTask.java  |  8 +++++
 .../doris/statistics/StatisticsRepository.java     | 13 +++++--
 .../apache/doris/statistics/TableStatsMeta.java    | 42 ++++++++++++++++++++++
 .../suites/statistics/test_analyze_mv.groovy       | 39 ++++++++++++++++++++
 8 files changed, 155 insertions(+), 7 deletions(-)

diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index 9e42b34657c..d3b2bc4421b 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -4599,7 +4599,12 @@ show_param ::=
     /* show table stats */
     | KW_TABLE opt_cached:cached KW_STATS table_name:tbl 
opt_partition_names:partitionNames opt_col_list:cols
     {:
-        RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached);
+        RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached, 
null);
+    :}
+    /* show index stats */
+    | KW_INDEX KW_STATS table_name:tbl ident:id
+    {:
+        RESULT = new ShowTableStatsStmt(tbl, null, null, false, id);
     :}
     /* show column stats */
     | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols 
opt_partition_names:partitionNames
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
index 40c8600381e..e5cc4bff614 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
@@ -134,6 +134,10 @@ public class AlterColumnStatsStmt extends DdlStmt {
             throw new AnalysisException(optional.get() + " is invalid 
statistics");
         }
 
+        if (!properties.containsKey(StatsType.ROW_COUNT.getValue())) {
+            throw new AnalysisException("Set column stats must set row_count. 
e.g. 'row_count'='5'");
+        }
+
         // get statsTypeToValue
         properties.forEach((key, value) -> {
             StatsType statsType = StatsType.fromString(key);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index 5edfbe05886..36a0e9a5872 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -70,6 +70,13 @@ public class ShowTableStatsStmt extends ShowStmt {
                     .add("row_count")
                     .build();
 
+    private static final ImmutableList<String> INDEX_TITLE_NAMES =
+            new ImmutableList.Builder<String>()
+            .add("table_name")
+            .add("index_name")
+            .add("row_count")
+            .build();
+
     private static final ImmutableList<String> COLUMN_PARTITION_TITLE_NAMES =
             new ImmutableList.Builder<String>()
                 .add("index_name")
@@ -82,15 +89,17 @@ public class ShowTableStatsStmt extends ShowStmt {
     private final List<String> columnNames;
     private final PartitionNames partitionNames;
     private final boolean cached;
+    private final String indexName;
 
     private TableIf table;
 
     public ShowTableStatsStmt(TableName tableName, List<String> columnNames,
-                              PartitionNames partitionNames, boolean cached) {
+                              PartitionNames partitionNames, boolean cached, 
String indexName) {
         this.tableName = tableName;
         this.columnNames = columnNames;
         this.partitionNames = partitionNames;
         this.cached = cached;
+        this.indexName = indexName;
     }
 
     public TableName getTableName() {
@@ -141,8 +150,10 @@ public class ShowTableStatsStmt extends ShowStmt {
         ShowResultSetMetaData.Builder builder = 
ShowResultSetMetaData.builder();
 
         ImmutableList<String> titles;
-        // If columnNames != null, partitionNames is also not null. Guaranteed 
in analyze()
-        if (columnNames != null) {
+        if (indexName != null) {
+            titles = INDEX_TITLE_NAMES;
+        } else if (columnNames != null) {
+            // If columnNames != null, partitionNames is also not null. 
Guaranteed in analyze()
             titles = COLUMN_PARTITION_TITLE_NAMES;
         } else if (partitionNames != null) {
             titles = PARTITION_TITLE_NAMES;
@@ -160,6 +171,9 @@ public class ShowTableStatsStmt extends ShowStmt {
     }
 
     public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
+        if (indexName != null) {
+            return constructIndexResultSet(tableStatistic);
+        }
         if (partitionNames == null) {
             return constructTableResultSet(tableStatistic);
         }
@@ -238,6 +252,28 @@ public class ShowTableStatsStmt extends ShowStmt {
         return new ShowResultSet(getMetaData(), result);
     }
 
+    public ShowResultSet constructIndexResultSet(TableStatsMeta 
tableStatistic) {
+        List<List<String>> result = Lists.newArrayList();
+        if (!(table instanceof OlapTable)) {
+            return new ShowResultSet(getMetaData(), result);
+        }
+        OlapTable olapTable = (OlapTable) table;
+        Long indexId = olapTable.getIndexIdByName(indexName);
+        if (indexId == null) {
+            throw new RuntimeException(String.format("Index %s not exist.", 
indexName));
+        }
+        long rowCount = 
tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
+        if (rowCount == -1) {
+            return new ShowResultSet(getMetaData(), result);
+        }
+        List<String> row = Lists.newArrayList();
+        row.add(table.getName());
+        row.add(indexName);
+        row.add(String.valueOf(rowCount));
+        result.add(row);
+        return new ShowResultSet(getMetaData(), result);
+    }
+
     public ShowResultSet constructColumnPartitionResultSet(TableStatsMeta 
tableStatistic) {
         List<List<String>> result = Lists.newArrayList();
         if (!(table instanceof OlapTable)) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index 00e8dd0bdbb..6ec413821ea 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -40,6 +40,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.StringJoiner;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
 
 public class AnalysisInfo implements Writable {
 
@@ -196,6 +197,8 @@ public class AnalysisInfo implements Writable {
     @SerializedName("ep")
     public final boolean enablePartition;
 
+    public final ConcurrentMap<Long, Long> indexesRowCount = new 
ConcurrentHashMap<>();
+
     public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long 
catalogId, long dbId, long tblId,
             Set<Pair<String, String>> jobColumns, Set<String> partitionNames, 
String colName, Long indexId,
             JobType jobType, AnalysisMethod analysisMethod, AnalysisType 
analysisType,
@@ -350,4 +353,8 @@ public class AnalysisInfo implements Writable {
     public TableIf getTable() {
         return StatisticsUtil.findTable(catalogId, dbId, tblId);
     }
+
+    public void addIndexRowCount(long indexId, long rowCount) {
+        indexesRowCount.put(indexId, rowCount);
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 89514effb13..329231f3604 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -498,6 +498,14 @@ public abstract class BaseAnalysisTask {
         try (AutoCloseConnectContext a  = 
StatisticsUtil.buildConnectContext()) {
             stmtExecutor = new StmtExecutor(a.connectContext, sql);
             ColStatsData colStatsData = new 
ColStatsData(stmtExecutor.executeInternalQuery().get(0));
+            // Update index row count after analyze.
+            if (this instanceof OlapAnalysisTask) {
+                AnalysisInfo jobInfo = 
Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId);
+                // For sync job, get jobInfo from job.jobInfo.
+                jobInfo = jobInfo == null ? job.jobInfo : jobInfo;
+                long indexId = info.indexId == -1 ? ((OlapTable) 
tbl).getBaseIndexId() : info.indexId;
+                jobInfo.addIndexRowCount(indexId, colStatsData.count);
+            }
             
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
             queryId = DebugUtil.printId(stmtExecutor.getContext().queryId());
             job.appendBuf(this, Collections.singletonList(colStatsData));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 81192762f99..6202d40f1d9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -21,6 +21,7 @@ import org.apache.doris.analysis.AlterColumnStatsStmt;
 import org.apache.doris.analysis.TableName;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
@@ -313,12 +314,13 @@ public class StatisticsRepository {
         String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE);
         String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE);
         long indexId = alterColumnStatsStmt.getIndexId();
+        if (rowCount == null) {
+            throw new RuntimeException("Row count is null.");
+        }
         ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
         String colName = alterColumnStatsStmt.getColumnName();
         Column column = objects.table.getColumn(colName);
-        if (rowCount != null) {
-            builder.setCount(Double.parseDouble(rowCount));
-        }
+        builder.setCount(Double.parseDouble(rowCount));
         if (ndv != null) {
             double dNdv = Double.parseDouble(ndv);
             builder.setNdv(dNdv);
@@ -372,10 +374,15 @@ public class StatisticsRepository {
             AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder()
                     .setTblUpdateTime(System.currentTimeMillis())
                     .setColName("")
+                    .setRowCount((long) Double.parseDouble(rowCount))
                     .setJobColumns(Sets.newHashSet())
                     .setUserInject(true)
                     .setJobType(AnalysisInfo.JobType.MANUAL)
                     .build();
+            if (objects.table instanceof OlapTable) {
+                indexId = indexId == -1 ? ((OlapTable) 
objects.table).getBaseIndexId() : indexId;
+                mockedJobInfo.addIndexRowCount(indexId, (long) 
Double.parseDouble(rowCount));
+            }
             
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo,
 objects.table);
         } else {
             // update partition granularity statistics
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index 1048708958b..4296cb02526 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -35,6 +35,9 @@ import com.google.gson.annotations.SerializedName;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
@@ -82,6 +85,9 @@ public class TableStatsMeta implements Writable, 
GsonPostProcessable {
     @SerializedName("pur")
     public ConcurrentMap<Long, Long> partitionUpdateRows = new 
ConcurrentHashMap<>();
 
+    @SerializedName("irc")
+    public ConcurrentMap<Long, Long> indexesRowCount = new 
ConcurrentHashMap<>();
+
     @VisibleForTesting
     public TableStatsMeta() {
         tblId = 0;
@@ -154,6 +160,10 @@ public class TableStatsMeta implements Writable, 
GsonPostProcessable {
         }
         jobType = analyzedJob.jobType;
         if (tableIf != null) {
+            if (tableIf instanceof OlapTable) {
+                indexesRowCount.putAll(analyzedJob.indexesRowCount);
+                clearStaleIndexRowCount((OlapTable) tableIf);
+            }
             rowCount = analyzedJob.rowCount;
             if (rowCount == 0 && 
AnalysisMethod.SAMPLE.equals(analyzedJob.analysisMethod)) {
                 return;
@@ -178,5 +188,37 @@ public class TableStatsMeta implements Writable, 
GsonPostProcessable {
         if (partitionUpdateRows == null) {
             partitionUpdateRows = new ConcurrentHashMap<>();
         }
+        if (indexesRowCount == null) {
+            indexesRowCount = new ConcurrentHashMap<>();
+        }
+    }
+
+    public long getRowCount(long indexId) {
+        return indexesRowCount.getOrDefault(indexId, -1L);
+    }
+
+    private void clearStaleIndexRowCount(OlapTable table) {
+        Iterator<Long> iterator = indexesRowCount.keySet().iterator();
+        List<Long> indexIds = table.getIndexIds();
+        while (iterator.hasNext()) {
+            long key = iterator.next();
+            if (indexIds.contains(key)) {
+                iterator.remove();
+            }
+        }
+    }
+
+    public long getBaseIndexDeltaRowCount(OlapTable table) {
+        if (colToColStatsMeta == null) {
+            return -1;
+        }
+        long maxUpdateRows = 0;
+        String baseIndexName = table.getIndexNameById(table.getBaseIndexId());
+        for (Map.Entry<Pair<String, String>, ColStatsMeta> entry : 
colToColStatsMeta.entrySet()) {
+            if (entry.getKey().first.equals(baseIndexName) && 
entry.getValue().updatedRows > maxUpdateRows) {
+                maxUpdateRows = entry.getValue().updatedRows;
+            }
+        }
+        return updatedRows.get() - maxUpdateRows;
     }
 }
diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy 
b/regression-test/suites/statistics/test_analyze_mv.groovy
index 704d4212557..862949b8c93 100644
--- a/regression-test/suites/statistics/test_analyze_mv.groovy
+++ b/regression-test/suites/statistics/test_analyze_mv.groovy
@@ -134,6 +134,28 @@ suite("test_analyze_mv") {
 
     sql """analyze table mvTestDup with sync;"""
 
+    // Test show index row count
+    def result_row = sql """show index stats mvTestDup mvTestDup"""
+    assertEquals(1, result_row.size())
+    assertEquals("mvTestDup", result_row[0][0])
+    assertEquals("mvTestDup", result_row[0][1])
+    assertEquals("6", result_row[0][2])
+    result_row = sql """show index stats mvTestDup mv1"""
+    assertEquals(1, result_row.size())
+    assertEquals("mvTestDup", result_row[0][0])
+    assertEquals("mv1", result_row[0][1])
+    assertEquals("6", result_row[0][2])
+    result_row = sql """show index stats mvTestDup mv2"""
+    assertEquals(1, result_row.size())
+    assertEquals("mvTestDup", result_row[0][0])
+    assertEquals("mv2", result_row[0][1])
+    assertEquals("6", result_row[0][2])
+    result_row = sql """show index stats mvTestDup mv3"""
+    assertEquals(1, result_row.size())
+    assertEquals("mvTestDup", result_row[0][0])
+    assertEquals("mv3", result_row[0][1])
+    assertEquals("4", result_row[0][2])
+
     // Compare show whole table column stats result with show single column.
     def result_all = sql """show column stats mvTestDup"""
     assertEquals(12, result_all.size())
@@ -411,6 +433,23 @@ suite("test_analyze_mv") {
     assertEquals("4001", result_sample[0][8])
     assertEquals("FULL", result_sample[0][9])
 
+    // Test alter table index row count.
+    sql """alter table mvTestDup modify column `value2` set stats 
('row_count'='1.5E8', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 
'min_value'='1', 'max_value'='10');"""
+    result_row = sql """show index stats mvTestDup mvTestDup;"""
+    assertEquals("mvTestDup", result_row[0][0])
+    assertEquals("mvTestDup", result_row[0][1])
+    assertEquals("150000000", result_row[0][2])
+    sql """alter table mvTestDup index mv1 modify column `mv_key1` set stats 
('row_count'='3443', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 
'min_value'='1', 'max_value'='10');"""
+    result_row = sql """show index stats mvTestDup mv1;"""
+    assertEquals("mvTestDup", result_row[0][0])
+    assertEquals("mv1", result_row[0][1])
+    assertEquals("3443", result_row[0][2])
+    sql """alter table mvTestDup index mv3 modify column `mva_MAX__``value2``` 
set stats ('row_count'='234234', 'ndv'='3.0', 'num_nulls'='0.0', 
'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
+    result_row = sql """show index stats mvTestDup mv3;"""
+    assertEquals("mvTestDup", result_row[0][0])
+    assertEquals("mv3", result_row[0][1])
+    assertEquals("234234", result_row[0][2])
+
     sql """drop stats mvTestDup"""
     result_sample = sql """show column stats mvTestDup"""
     assertEquals(0, result_sample.size())


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to