This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5a7d99e2f0 [Improvement](statistics) Support for collecting statistics 
at the granularity of partitions. (#17966)
5a7d99e2f0 is described below

commit 5a7d99e2f02d1114ae35db9a45ebf62e6c7d8e14
Author: ElvinWei <[email protected]>
AuthorDate: Thu Mar 23 09:05:42 2023 +0800

    [Improvement](statistics) Support for collecting statistics at the 
granularity of partitions. (#17966)
    
    * Support for collecting statistics at the granularity of partitions
    
    * Add ut and fix some bug
---
 fe/fe-core/src/main/cup/sql_parser.cup             |  10 +-
 .../org/apache/doris/analysis/AnalyzeStmt.java     | 220 ++++++++++-----------
 .../apache/doris/statistics/AnalysisManager.java   |  12 +-
 .../apache/doris/statistics/AnalysisTaskInfo.java  |  15 +-
 .../doris/statistics/AnalysisTaskInfoBuilder.java  |  12 +-
 .../org/apache/doris/statistics/HistogramTask.java |  32 ++-
 .../apache/doris/statistics/MVAnalysisTask.java    |  14 +-
 .../apache/doris/statistics/OlapAnalysisTask.java  |   4 +-
 .../apache/doris/statistics/AnalysisJobTest.java   |   2 +
 .../doris/statistics/AnalysisTaskExecutorTest.java |   2 +
 .../apache/doris/statistics/HistogramTaskTest.java | 131 ++++++++++++
 11 files changed, 307 insertions(+), 147 deletions(-)

diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index 03f4b2cec6..3936c9c56a 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -2789,11 +2789,15 @@ show_create_load_stmt ::=
 analyze_stmt ::=
     KW_ANALYZE KW_TABLE table_name:tbl opt_col_list:cols 
opt_partition_names:partitionNames opt_properties:properties
     {:
-        RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties);
+        boolean is_whole_tbl = (cols == null);
+        boolean is_histogram = false;
+        RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties, 
is_whole_tbl, is_histogram);
     :}
-    | KW_ANALYZE KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM KW_ON 
ident_list:cols opt_properties:properties
+    | KW_ANALYZE KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM KW_ON 
ident_list:cols opt_partition_names:partitionNames opt_properties:properties
     {:
-        RESULT = new AnalyzeStmt(tbl, cols, properties);
+        boolean is_whole_tbl = (cols == null);
+        boolean is_histogram = true;
+        RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties, 
is_whole_tbl, is_histogram);
     :}
     ;
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
index cbdebcff20..73dfb14dcc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
@@ -36,11 +36,9 @@ import org.apache.doris.datasource.CatalogIf;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
 
-import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import org.apache.commons.collections.CollectionUtils;
+import com.google.common.collect.Sets;
 import org.apache.commons.lang.StringUtils;
 
 import java.util.List;
@@ -63,48 +61,36 @@ public class AnalyzeStmt extends DdlStmt {
     // time to wait for collect  statistics
     public static final String CBO_STATISTICS_TASK_TIMEOUT_SEC = 
"cbo_statistics_task_timeout_sec";
 
-    public boolean isHistogram = false;
-
     private static final ImmutableSet<String> PROPERTIES_SET = new 
ImmutableSet.Builder<String>()
             .add(CBO_STATISTICS_TASK_TIMEOUT_SEC)
             .build();
 
     private static final Predicate<Long> DESIRED_TASK_TIMEOUT_SEC = (v) -> v > 
0L;
 
-    public final boolean wholeTbl;
+    public boolean isWholeTbl;
+    public boolean isHistogram;
 
     private final TableName tableName;
-
-    private TableIf table;
-
-    private PartitionNames optPartitionNames;
-    private List<String> optColumnNames;
-    private Map<String, String> optProperties;
+    private final PartitionNames partitionNames;
+    private final List<String> columnNames;
+    private final Map<String, String> properties;
 
     // after analyzed
     private long dbId;
-
-    private final List<String> partitionNames = Lists.newArrayList();
-
-    public AnalyzeStmt(TableName tableName,
-            List<String> optColumnNames,
-            Map<String, String> optProperties) {
-        this.tableName = tableName;
-        this.optColumnNames = optColumnNames;
-        wholeTbl = CollectionUtils.isEmpty(optColumnNames);
-        isHistogram = true;
-        this.optProperties = optProperties;
-    }
+    private TableIf table;
 
     public AnalyzeStmt(TableName tableName,
-            List<String> optColumnNames,
-            PartitionNames optPartitionNames,
-            Map<String, String> optProperties) {
+                       List<String> columnNames,
+                       PartitionNames partitionNames,
+                       Map<String, String> properties,
+                       Boolean isWholeTbl,
+                       Boolean isHistogram) {
         this.tableName = tableName;
-        this.optColumnNames = optColumnNames;
-        this.optPartitionNames = optPartitionNames;
-        wholeTbl = CollectionUtils.isEmpty(optColumnNames);
-        this.optProperties = optProperties;
+        this.columnNames = columnNames;
+        this.partitionNames = partitionNames;
+        this.properties = properties;
+        this.isWholeTbl = isWholeTbl;
+        this.isHistogram = isHistogram;
     }
 
     @Override
@@ -116,18 +102,20 @@ public class AnalyzeStmt extends DdlStmt {
         String catalogName = tableName.getCtl();
         String dbName = tableName.getDb();
         String tblName = tableName.getTbl();
-        CatalogIf catalog = 
analyzer.getEnv().getCatalogMgr().getCatalogOrAnalysisException(catalogName);
+        CatalogIf catalog = analyzer.getEnv().getCatalogMgr()
+                .getCatalogOrAnalysisException(catalogName);
         DatabaseIf db = catalog.getDbOrAnalysisException(dbName);
+        dbId = db.getId();
         table = db.getTableOrAnalysisException(tblName);
 
         checkAnalyzePriv(dbName, tblName);
 
-        if (optColumnNames != null && !optColumnNames.isEmpty()) {
+        if (columnNames != null && !columnNames.isEmpty()) {
             table.readLock();
             try {
                 List<String> baseSchema = table.getBaseSchema(false)
                         
.stream().map(Column::getName).collect(Collectors.toList());
-                Optional<String> optional = optColumnNames.stream()
+                Optional<String> optional = columnNames.stream()
                         .filter(entity -> 
!baseSchema.contains(entity)).findFirst();
                 if (optional.isPresent()) {
                     String columnName = optional.get();
@@ -137,14 +125,10 @@ public class AnalyzeStmt extends DdlStmt {
             } finally {
                 table.readUnlock();
             }
-        } else {
-            optColumnNames = table.getBaseSchema(false)
-                    
.stream().map(Column::getName).collect(Collectors.toList());
         }
-        dbId = db.getId();
-        // step2: analyze partition
+
         checkPartitionNames();
-        // step3: analyze properties
+
         checkProperties();
     }
 
@@ -166,46 +150,81 @@ public class AnalyzeStmt extends DdlStmt {
     }
 
     private void checkPartitionNames() throws AnalysisException {
-        if (optPartitionNames != null) {
-            optPartitionNames.analyze(analyzer);
-            if (tableName != null) {
-                Database db = 
analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(tableName.getDb());
-                OlapTable olapTable = (OlapTable) 
db.getTableOrAnalysisException(tableName.getTbl());
-                if (!olapTable.isPartitioned()) {
-                    throw new AnalysisException("Not a partitioned table: " + 
olapTable.getName());
-                }
-                List<String> names = optPartitionNames.getPartitionNames();
-                Set<String> olapPartitionNames = olapTable.getPartitionNames();
-                List<String> tempPartitionNames = 
olapTable.getTempPartitions().stream()
-                        .map(Partition::getName).collect(Collectors.toList());
-                Optional<String> optional = names.stream()
-                        .filter(name -> (tempPartitionNames.contains(name)
-                                || !olapPartitionNames.contains(name)))
-                        .findFirst();
-                if (optional.isPresent()) {
-                    throw new AnalysisException("Temporary partition or 
partition does not exist");
-                }
-            } else {
-                throw new AnalysisException("Specify partition should specify 
table name as well");
+        if (partitionNames != null) {
+            partitionNames.analyze(analyzer);
+            Database db = analyzer.getEnv().getInternalCatalog()
+                    .getDbOrAnalysisException(tableName.getDb());
+            OlapTable olapTable = (OlapTable) 
db.getTableOrAnalysisException(tableName.getTbl());
+            if (!olapTable.isPartitioned()) {
+                throw new AnalysisException("Not a partitioned table: " + 
olapTable.getName());
+            }
+            List<String> names = partitionNames.getPartitionNames();
+            Set<String> olapPartitionNames = olapTable.getPartitionNames();
+            List<String> tempPartitionNames = 
olapTable.getTempPartitions().stream()
+                    .map(Partition::getName).collect(Collectors.toList());
+            Optional<String> illegalPartitionName = names.stream()
+                    .filter(name -> (tempPartitionNames.contains(name)
+                            || !olapPartitionNames.contains(name)))
+                    .findFirst();
+            if (illegalPartitionName.isPresent()) {
+                throw new AnalysisException("Temporary partition or partition 
does not exist");
             }
-            partitionNames.addAll(optPartitionNames.getPartitionNames());
         }
     }
 
     private void checkProperties() throws UserException {
-        if (optProperties == null) {
-            optProperties = Maps.newHashMap();
-        } else {
-            Optional<String> optional = optProperties.keySet().stream().filter(
+        if (properties != null) {
+            Optional<String> optional = properties.keySet().stream().filter(
                     entity -> !PROPERTIES_SET.contains(entity)).findFirst();
             if (optional.isPresent()) {
                 throw new AnalysisException(optional.get() + " is invalid 
property");
             }
+
+            long taskTimeout = ((Long) Util.getLongPropertyOrDefault(
+                    properties.get(CBO_STATISTICS_TASK_TIMEOUT_SEC),
+                    Config.max_cbo_statistics_task_timeout_sec, 
DESIRED_TASK_TIMEOUT_SEC,
+                    CBO_STATISTICS_TASK_TIMEOUT_SEC + " should > 
0")).intValue();
+            properties.put(CBO_STATISTICS_TASK_TIMEOUT_SEC, 
String.valueOf(taskTimeout));
         }
-        long taskTimeout = ((Long) 
Util.getLongPropertyOrDefault(optProperties.get(CBO_STATISTICS_TASK_TIMEOUT_SEC),
-                Config.max_cbo_statistics_task_timeout_sec, 
DESIRED_TASK_TIMEOUT_SEC,
-                CBO_STATISTICS_TASK_TIMEOUT_SEC + " should > 0")).intValue();
-        optProperties.put(CBO_STATISTICS_TASK_TIMEOUT_SEC, 
String.valueOf(taskTimeout));
+    }
+
+    public String getCatalogName() {
+        return tableName.getCtl();
+    }
+
+    public long getDbId() {
+        return dbId;
+    }
+
+    public String getDBName() {
+        return tableName.getDb();
+    }
+
+    public Database getDb() throws AnalysisException {
+        return 
analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId);
+    }
+
+    public TableIf getTable() {
+        return table;
+    }
+
+    public TableName getTblName() {
+        return tableName;
+    }
+
+    public Set<String> getColumnNames() {
+        return columnNames == null ? table.getBaseSchema(false)
+                .stream().map(Column::getName).collect(Collectors.toSet()) : 
Sets.newHashSet(columnNames);
+    }
+
+    public Set<String> getPartitionNames() {
+        return partitionNames == null ? 
Sets.newHashSet(table.getPartitionNames())
+                : Sets.newHashSet(partitionNames.getPartitionNames());
+    }
+
+    public Map<String, String> getProperties() {
+        // TODO add default properties
+        return properties != null ? properties : Maps.newHashMap();
     }
 
     @Override
@@ -218,21 +237,26 @@ public class AnalyzeStmt extends DdlStmt {
             sb.append(tableName.toSql());
         }
 
-        if (optColumnNames != null) {
+        if (isHistogram) {
+            sb.append(" ");
+            sb.append("UPDATE HISTOGRAM ON");
+            sb.append(" ");
+            sb.append(StringUtils.join(columnNames, ","));
+        } else if (columnNames != null) {
             sb.append("(");
-            sb.append(StringUtils.join(optColumnNames, ","));
+            sb.append(StringUtils.join(columnNames, ","));
             sb.append(")");
         }
 
-        if (optPartitionNames != null) {
+        if (partitionNames != null) {
             sb.append(" ");
-            sb.append(optPartitionNames.toSql());
+            sb.append(partitionNames.toSql());
         }
 
-        if (optProperties != null) {
+        if (properties != null) {
             sb.append(" ");
             sb.append("PROPERTIES(");
-            sb.append(new PrintableMap<>(optProperties, " = ",
+            sb.append(new PrintableMap<>(properties, " = ",
                     true,
                     false));
             sb.append(")");
@@ -240,48 +264,4 @@ public class AnalyzeStmt extends DdlStmt {
 
         return sb.toString();
     }
-
-    public String getCatalogName() {
-        return tableName.getCtl();
-    }
-
-    public String getDBName() {
-        return tableName.getDb();
-    }
-
-    public TableName getTblName() {
-        return tableName;
-    }
-
-    public List<String> getOptColumnNames() {
-        return optColumnNames;
-    }
-
-
-    public long getDbId() {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The dbId must be obtained after the parsing is complete");
-        return dbId;
-    }
-
-    public Database getDb() throws AnalysisException {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The db must be obtained after the parsing is complete");
-        return 
analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId);
-    }
-
-    public TableIf getTable() {
-        return table;
-    }
-
-    public List<String> getPartitionNames() {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The partitionNames must be obtained after the parsing is 
complete");
-        return partitionNames;
-    }
-
-    public Map<String, String> getProperties() {
-        return optProperties;
-    }
-
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index a8f95d9cd4..bc8e944a52 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -45,6 +45,7 @@ import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
@@ -89,7 +90,8 @@ public class AnalysisManager {
         String db = analyzeStmt.getDBName();
         TableName tbl = analyzeStmt.getTblName();
         StatisticsUtil.convertTableNameToObjects(tbl);
-        List<String> colNames = analyzeStmt.getOptColumnNames();
+        Set<String> colNames = analyzeStmt.getColumnNames();
+        Set<String> partitionNames = analyzeStmt.getPartitionNames();
         Map<Long, AnalysisTaskInfo> analysisTaskInfos = new HashMap<>();
         long jobId = Env.getCurrentEnv().getNextId();
         if (colNames != null) {
@@ -98,7 +100,8 @@ public class AnalysisManager {
                 AnalysisType analType = analyzeStmt.isHistogram ? 
AnalysisType.HISTOGRAM : AnalysisType.COLUMN;
                 AnalysisTaskInfo analysisTaskInfo = new 
AnalysisTaskInfoBuilder().setJobId(jobId)
                         
.setTaskId(taskId).setCatalogName(catalogName).setDbName(db)
-                        
.setTblName(tbl.getTbl()).setColName(colName).setJobType(JobType.MANUAL)
+                        .setTblName(tbl.getTbl()).setColName(colName)
+                        
.setPartitionNames(partitionNames).setJobType(JobType.MANUAL)
                         
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(analType)
                         .setState(AnalysisState.PENDING)
                         .setScheduleType(ScheduleType.ONCE).build();
@@ -110,7 +113,7 @@ public class AnalysisManager {
                 analysisTaskInfos.put(taskId, analysisTaskInfo);
             }
         }
-        if (analyzeStmt.wholeTbl && 
analyzeStmt.getTable().getType().equals(TableType.OLAP)) {
+        if (analyzeStmt.isWholeTbl && 
analyzeStmt.getTable().getType().equals(TableType.OLAP)) {
             OlapTable olapTable = (OlapTable) analyzeStmt.getTable();
             try {
                 olapTable.readLock();
@@ -122,7 +125,8 @@ public class AnalysisManager {
                     AnalysisTaskInfo analysisTaskInfo = new 
AnalysisTaskInfoBuilder().setJobId(
                                     jobId).setTaskId(taskId)
                             .setCatalogName(catalogName).setDbName(db)
-                            
.setTblName(tbl.getTbl()).setIndexId(meta.getIndexId()).setJobType(JobType.MANUAL)
+                            
.setTblName(tbl.getTbl()).setPartitionNames(partitionNames)
+                            
.setIndexId(meta.getIndexId()).setJobType(JobType.MANUAL)
                             
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.INDEX)
                             .setScheduleType(ScheduleType.ONCE).build();
                     try {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
index b0b1c370cb..8c9acac24f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
@@ -20,6 +20,7 @@ package org.apache.doris.statistics;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import java.util.Set;
 import java.util.StringJoiner;
 
 public class AnalysisTaskInfo {
@@ -62,6 +63,8 @@ public class AnalysisTaskInfo {
 
     public final String colName;
 
+    public final Set<String> partitionNames;
+
     public final Long indexId;
 
     public final JobType jobType;
@@ -84,15 +87,16 @@ public class AnalysisTaskInfo {
     public final ScheduleType scheduleType;
 
     public AnalysisTaskInfo(long jobId, long taskId, String catalogName, 
String dbName, String tblName,
-            String colName, Long indexId, JobType jobType,
-            AnalysisMethod analysisMethod, AnalysisType analysisType, String 
message, int lastExecTimeInMs,
-            AnalysisState state, ScheduleType scheduleType) {
+            String colName, Set<String> partitionNames, Long indexId, JobType 
jobType,
+            AnalysisMethod analysisMethod, AnalysisType analysisType, String 
message,
+            int lastExecTimeInMs, AnalysisState state, ScheduleType 
scheduleType) {
         this.jobId = jobId;
         this.taskId = taskId;
         this.catalogName = catalogName;
         this.dbName = dbName;
         this.tblName = tblName;
         this.colName = colName;
+        this.partitionNames = partitionNames;
         this.indexId = indexId;
         this.jobType = jobType;
         this.analysisMethod = analysisMethod;
@@ -106,15 +110,16 @@ public class AnalysisTaskInfo {
     @Override
     public String toString() {
         StringJoiner sj = new StringJoiner("\n", getClass().getName() + ":\n", 
"\n");
-        sj.add("JobId: " + String.valueOf(jobId));
+        sj.add("JobId: " + jobId);
         sj.add("CatalogName: " + catalogName);
         sj.add("DBName: " + dbName);
         sj.add("TableName: " + tblName);
         sj.add("ColumnName: " + colName);
+        sj.add("PartitionNames: " + partitionNames);
         sj.add("TaskType: " + analysisType.toString());
         sj.add("TaskMethod: " + analysisMethod.toString());
         sj.add("Message: " + message);
-        sj.add("LastExecTime: " + String.valueOf(lastExecTimeInMs));
+        sj.add("LastExecTime: " + lastExecTimeInMs);
         sj.add("CurrentState: " + state.toString());
         return sj.toString();
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
index cc3b7b62f1..e687d6fe93 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
@@ -22,6 +22,8 @@ import 
org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
 import org.apache.doris.statistics.AnalysisTaskInfo.JobType;
 import org.apache.doris.statistics.AnalysisTaskInfo.ScheduleType;
 
+import java.util.Set;
+
 public class AnalysisTaskInfoBuilder {
     private long jobId;
     private long taskId;
@@ -29,6 +31,7 @@ public class AnalysisTaskInfoBuilder {
     private String dbName;
     private String tblName;
     private String colName;
+    private Set<String> partitionNames;
     private Long indexId;
     private JobType jobType;
     private AnalysisMethod analysisMethod;
@@ -68,6 +71,11 @@ public class AnalysisTaskInfoBuilder {
         return this;
     }
 
+    public AnalysisTaskInfoBuilder setPartitionNames(Set<String> 
partitionNames) {
+        this.partitionNames = partitionNames;
+        return this;
+    }
+
     public AnalysisTaskInfoBuilder setIndexId(Long indexId) {
         this.indexId = indexId;
         return this;
@@ -109,7 +117,7 @@ public class AnalysisTaskInfoBuilder {
     }
 
     public AnalysisTaskInfo build() {
-        return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName, 
tblName, colName, indexId, jobType,
-                analysisMethod, analysisType, message, lastExecTimeInMs, 
state, scheduleType);
+        return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName, 
tblName, colName, partitionNames,
+                indexId, jobType, analysisMethod, analysisType, message, 
lastExecTimeInMs, state, scheduleType);
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
index a58696c542..73cb2d1c3d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
@@ -28,14 +28,15 @@ import org.apache.commons.text.StringSubstitutor;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 /**
  * Each task analyze one column.
  */
 public class HistogramTask extends BaseAnalysisTask {
 
-    /** To avoid too much data, use the following efficient sampling method */
-    private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE = "INSERT INTO "
+    private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE = "INSERT 
INTO "
             + "${internalDB}.${histogramStatTbl} "
             + "SELECT "
             + "    CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
@@ -48,7 +49,10 @@ public class HistogramTask extends BaseAnalysisTask {
             + "    HISTOGRAM(`${colName}`, 1, ${maxBucketNum}) AS buckets, "
             + "    NOW() AS create_time "
             + "FROM "
-            + "    `${dbName}`.`${tblName}` TABLESAMPLE (${percentValue} 
PERCENT)";
+            + "    `${dbName}`.`${tblName}`";
+
+    private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART = 
ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE
+            + "    PARTITION (${partName})";
 
     @VisibleForTesting
     public HistogramTask() {
@@ -76,8 +80,26 @@ public class HistogramTask extends BaseAnalysisTask {
         params.put("maxBucketNum", String.valueOf(info.maxBucketNum));
         params.put("percentValue", String.valueOf((int) (info.sampleRate * 
100)));
 
-        StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
-        String histogramSql = 
stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE);
+        String histogramSql;
+        Set<String> partitionNames = info.partitionNames;
+
+        if (partitionNames.isEmpty()) {
+            StringSubstitutor stringSubstitutor = new 
StringSubstitutor(params);
+            histogramSql = 
stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE);
+        } else {
+            try {
+                tbl.readLock();
+                String partNames = info.partitionNames.stream()
+                        .filter(x -> tbl.getPartition(x) != null)
+                        .collect(Collectors.joining(","));
+                params.put("partName", partNames);
+                StringSubstitutor stringSubstitutor = new 
StringSubstitutor(params);
+                histogramSql = 
stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART);
+            } finally {
+                tbl.readUnlock();
+            }
+        }
+
         LOG.info("SQL to collect the histogram:\n {}", histogramSql);
 
         try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) 
{
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
index 5c66fd062b..c91398a0a6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
@@ -31,7 +31,6 @@ import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.MaterializedIndexMeta;
 import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.catalog.Partition;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.util.SqlParserUtils;
 import org.apache.doris.statistics.util.StatisticsUtil;
@@ -39,7 +38,7 @@ import org.apache.doris.statistics.util.StatisticsUtil;
 import com.google.common.base.Preconditions;
 
 import java.io.StringReader;
-import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -98,9 +97,9 @@ public class MVAnalysisTask extends BaseAnalysisTask {
                     .get();
             selectItem.setAlias(column.getName());
             Map<String, String> params = new HashMap<>();
-            for (Partition part : olapTable.getAllPartitions()) {
-                String partName = part.getName();
-                PartitionNames partitionName = new PartitionNames(false, 
Arrays.asList(partName));
+            for (String partName : info.partitionNames) {
+                PartitionNames partitionName = new PartitionNames(false,
+                        Collections.singletonList(partName));
                 tableRef.setPartitionNames(partitionName);
                 String sql = selectOne.toSql();
                 params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
@@ -111,8 +110,9 @@ public class MVAnalysisTask extends BaseAnalysisTask {
                 params.put("idxId", String.valueOf(meta.getIndexId()));
                 String colName = column.getName();
                 params.put("colId", colName);
-                long partId = part.getId();
-                params.put("partId", String.valueOf(partId));
+                String partId = olapTable.getPartition(partName) == null ? 
"NULL" :
+                        
String.valueOf(olapTable.getPartition(partName).getId());
+                params.put("partId", partId);
                 params.put("dataSizeFunction", getDataSizeFunction(column));
                 params.put("dbName", info.dbName);
                 params.put("colName", colName);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 97cd92c29c..a8ec452d95 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -42,6 +42,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
             + "FROM `${dbName}`.`${tblName}` "
             + "PARTITION ${partName}";
 
+    // TODO Currently, NDV is computed for the full table; in fact,
+    //  NDV should only be computed for the relevant partition.
     private static final String ANALYZE_COLUMN_SQL_TEMPLATE = 
INSERT_COL_STATISTICS
             + "     (SELECT NDV(`${colName}`) AS ndv "
             + "     FROM `${dbName}`.`${tblName}`) t2\n";
@@ -71,7 +73,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
         List<String> partitionAnalysisSQLs = new ArrayList<>();
         try {
             tbl.readLock();
-            Set<String> partNames = tbl.getPartitionNames();
+            Set<String> partNames = info.partitionNames;
             for (String partName : partNames) {
                 Partition part = tbl.getPartition(partName);
                 if (part == null) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
index 2cb428e402..8efe587cad 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
@@ -28,6 +28,7 @@ import org.apache.doris.statistics.AnalysisTaskInfo.JobType;
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.utframe.TestWithFeService;
 
+import com.google.common.collect.Sets;
 import mockit.Expectations;
 import mockit.Mock;
 import mockit.MockUp;
@@ -110,6 +111,7 @@ public class AnalysisJobTest extends TestWithFeService {
                 
.setCatalogName("internal").setDbName("default_cluster:analysis_job_test").setTblName("t1")
                 
.setColName("col1").setJobType(JobType.MANUAL).setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(
                         AnalysisType.COLUMN)
+                .setPartitionNames(Sets.newHashSet("t1"))
                 .build();
         new OlapAnalysisTask(scheduler, analysisJobInfo).execute();
     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
index 8dfcff429b..0b947133e4 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
@@ -26,6 +26,7 @@ import org.apache.doris.statistics.AnalysisTaskInfo.JobType;
 import org.apache.doris.statistics.util.BlockingCounter;
 import org.apache.doris.utframe.TestWithFeService;
 
+import com.google.common.collect.Sets;
 import mockit.Expectations;
 import mockit.Mock;
 import mockit.MockUp;
@@ -95,6 +96,7 @@ public class AnalysisTaskExecutorTest extends 
TestWithFeService {
                 
.setCatalogName("internal").setDbName("default_cluster:analysis_job_test").setTblName("t1")
                 
.setColName("col1").setJobType(JobType.MANUAL).setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(
                         AnalysisType.COLUMN)
+                .setPartitionNames(Sets.newHashSet("t1"))
                 .build();
         OlapAnalysisTask task = new OlapAnalysisTask(analysisTaskScheduler, 
analysisTaskInfo);
         new MockUp<AnalysisTaskScheduler>() {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java
new file mode 100644
index 0000000000..5026ef3353
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.jmockit.Deencapsulation;
+import org.apache.doris.qe.StmtExecutor;
+import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod;
+import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
+import org.apache.doris.statistics.AnalysisTaskInfo.JobType;
+import org.apache.doris.system.SystemInfoService;
+import org.apache.doris.utframe.TestWithFeService;
+
+import com.google.common.collect.Sets;
+import mockit.Expectations;
+import mockit.Mock;
+import mockit.MockUp;
+import mockit.Mocked;
+import mockit.Tested;
+import org.junit.FixMethodOrder;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.runners.MethodSorters;
+
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentMap;
+
+@FixMethodOrder(value = MethodSorters.NAME_ASCENDING)
+public class HistogramTaskTest extends TestWithFeService {
+
+    @Mocked
+    AnalysisTaskScheduler analysisTaskScheduler;
+
+    @Override
+    protected void runBeforeAll() throws Exception {
+        createDatabase("histogram_task_test");
+        connectContext.setDatabase(SystemInfoService.DEFAULT_CLUSTER + ":" + 
"histogram_task_test");
+        createTable(
+                "CREATE TABLE t1 (\n"
+                        + "    col1 date not null, \n"
+                        + "    col2 int not null, \n"
+                        + "    col3 int not null\n"
+                        + ")\n"
+                        + "PARTITION BY LIST(`col1`)\n"
+                        + "(\n"
+                        + "    PARTITION `p_201701` VALUES IN 
(\"2017-10-01\"),\n"
+                        + "    PARTITION `default`\n"
+                        + ")\n"
+                        + "DISTRIBUTED BY HASH(col3)\n"
+                        + "BUCKETS 1\n"
+                        + "PROPERTIES(\n"
+                        + "    \"replication_num\"=\"1\"\n"
+                        + ")");
+    }
+
+    @Tested
+
+    @Test
+    public void test1TaskCreation() throws Exception {
+
+        AnalysisManager analysisManager = 
Env.getCurrentEnv().getAnalysisManager();
+        StmtExecutor executor = getSqlStmtExecutor(
+                "ANALYZE TABLE t1 UPDATE HISTOGRAM ON col1 PARTITION 
(p_201701)");
+        Assertions.assertNotNull(executor);
+
+        ConcurrentMap<Long, Map<Long, AnalysisTaskInfo>> taskMap =
+                Deencapsulation.getField(analysisManager, 
"analysisJobIdToTaskMap");
+        Assertions.assertEquals(1, taskMap.size());
+
+        for (Entry<Long, Map<Long, AnalysisTaskInfo>> infoMap : 
taskMap.entrySet()) {
+            Map<Long, AnalysisTaskInfo> taskInfo = infoMap.getValue();
+            Assertions.assertEquals(1, taskInfo.size());
+
+            for (Entry<Long, AnalysisTaskInfo> infoEntry : 
taskInfo.entrySet()) {
+                AnalysisTaskInfo info = infoEntry.getValue();
+                Assertions.assertEquals(AnalysisType.HISTOGRAM, 
info.analysisType);
+                Assertions.assertEquals("t1", info.tblName);
+                Assertions.assertEquals("col1", info.colName);
+                Assertions.assertEquals("p_201701", 
info.partitionNames.iterator().next());
+            }
+        }
+    }
+
+    @Test
+    public void test2TaskExecution() throws Exception {
+        AnalysisTaskExecutor analysisTaskExecutor = new 
AnalysisTaskExecutor(analysisTaskScheduler);
+        AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder()
+                .setJobId(0).setTaskId(0).setCatalogName("internal")
+                .setDbName(SystemInfoService.DEFAULT_CLUSTER + ":" + 
"histogram_task_test").setTblName("t1")
+                
.setColName("col1").setJobType(JobType.MANUAL).setAnalysisMethod(AnalysisMethod.FULL)
+                .setAnalysisType(AnalysisType.HISTOGRAM)
+                 .setPartitionNames(Sets.newHashSet("t"))
+                .build();
+        HistogramTask task = new HistogramTask(analysisTaskScheduler, 
analysisTaskInfo);
+
+        new MockUp<AnalysisTaskScheduler>() {
+            @Mock
+            public synchronized BaseAnalysisTask getPendingTasks() {
+                return task;
+            }
+        };
+        new MockUp<AnalysisManager>() {
+            @Mock
+            public void updateTaskStatus(AnalysisTaskInfo info, AnalysisState 
jobState, String message, long time) {}
+        };
+        new Expectations() {
+            {
+                task.execute();
+                times = 1;
+            }
+        };
+
+        Deencapsulation.invoke(analysisTaskExecutor, "doFetchAndExecute");
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to