This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5a7d99e2f0 [Improvement](statistics) Support for collecting statistics
at the granularity of partitions. (#17966)
5a7d99e2f0 is described below
commit 5a7d99e2f02d1114ae35db9a45ebf62e6c7d8e14
Author: ElvinWei <[email protected]>
AuthorDate: Thu Mar 23 09:05:42 2023 +0800
[Improvement](statistics) Support for collecting statistics at the
granularity of partitions. (#17966)
* Support for collecting statistics at the granularity of partitions
* Add ut and fix some bug
---
fe/fe-core/src/main/cup/sql_parser.cup | 10 +-
.../org/apache/doris/analysis/AnalyzeStmt.java | 220 ++++++++++-----------
.../apache/doris/statistics/AnalysisManager.java | 12 +-
.../apache/doris/statistics/AnalysisTaskInfo.java | 15 +-
.../doris/statistics/AnalysisTaskInfoBuilder.java | 12 +-
.../org/apache/doris/statistics/HistogramTask.java | 32 ++-
.../apache/doris/statistics/MVAnalysisTask.java | 14 +-
.../apache/doris/statistics/OlapAnalysisTask.java | 4 +-
.../apache/doris/statistics/AnalysisJobTest.java | 2 +
.../doris/statistics/AnalysisTaskExecutorTest.java | 2 +
.../apache/doris/statistics/HistogramTaskTest.java | 131 ++++++++++++
11 files changed, 307 insertions(+), 147 deletions(-)
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup
b/fe/fe-core/src/main/cup/sql_parser.cup
index 03f4b2cec6..3936c9c56a 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -2789,11 +2789,15 @@ show_create_load_stmt ::=
analyze_stmt ::=
KW_ANALYZE KW_TABLE table_name:tbl opt_col_list:cols
opt_partition_names:partitionNames opt_properties:properties
{:
- RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties);
+ boolean is_whole_tbl = (cols == null);
+ boolean is_histogram = false;
+ RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties,
is_whole_tbl, is_histogram);
:}
- | KW_ANALYZE KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM KW_ON
ident_list:cols opt_properties:properties
+ | KW_ANALYZE KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM KW_ON
ident_list:cols opt_partition_names:partitionNames opt_properties:properties
{:
- RESULT = new AnalyzeStmt(tbl, cols, properties);
+ boolean is_whole_tbl = (cols == null);
+ boolean is_histogram = true;
+ RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties,
is_whole_tbl, is_histogram);
:}
;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
index cbdebcff20..73dfb14dcc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
@@ -36,11 +36,9 @@ import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
-import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import org.apache.commons.collections.CollectionUtils;
+import com.google.common.collect.Sets;
import org.apache.commons.lang.StringUtils;
import java.util.List;
@@ -63,48 +61,36 @@ public class AnalyzeStmt extends DdlStmt {
// time to wait for collect statistics
public static final String CBO_STATISTICS_TASK_TIMEOUT_SEC =
"cbo_statistics_task_timeout_sec";
- public boolean isHistogram = false;
-
private static final ImmutableSet<String> PROPERTIES_SET = new
ImmutableSet.Builder<String>()
.add(CBO_STATISTICS_TASK_TIMEOUT_SEC)
.build();
private static final Predicate<Long> DESIRED_TASK_TIMEOUT_SEC = (v) -> v >
0L;
- public final boolean wholeTbl;
+ public boolean isWholeTbl;
+ public boolean isHistogram;
private final TableName tableName;
-
- private TableIf table;
-
- private PartitionNames optPartitionNames;
- private List<String> optColumnNames;
- private Map<String, String> optProperties;
+ private final PartitionNames partitionNames;
+ private final List<String> columnNames;
+ private final Map<String, String> properties;
// after analyzed
private long dbId;
-
- private final List<String> partitionNames = Lists.newArrayList();
-
- public AnalyzeStmt(TableName tableName,
- List<String> optColumnNames,
- Map<String, String> optProperties) {
- this.tableName = tableName;
- this.optColumnNames = optColumnNames;
- wholeTbl = CollectionUtils.isEmpty(optColumnNames);
- isHistogram = true;
- this.optProperties = optProperties;
- }
+ private TableIf table;
public AnalyzeStmt(TableName tableName,
- List<String> optColumnNames,
- PartitionNames optPartitionNames,
- Map<String, String> optProperties) {
+ List<String> columnNames,
+ PartitionNames partitionNames,
+ Map<String, String> properties,
+ Boolean isWholeTbl,
+ Boolean isHistogram) {
this.tableName = tableName;
- this.optColumnNames = optColumnNames;
- this.optPartitionNames = optPartitionNames;
- wholeTbl = CollectionUtils.isEmpty(optColumnNames);
- this.optProperties = optProperties;
+ this.columnNames = columnNames;
+ this.partitionNames = partitionNames;
+ this.properties = properties;
+ this.isWholeTbl = isWholeTbl;
+ this.isHistogram = isHistogram;
}
@Override
@@ -116,18 +102,20 @@ public class AnalyzeStmt extends DdlStmt {
String catalogName = tableName.getCtl();
String dbName = tableName.getDb();
String tblName = tableName.getTbl();
- CatalogIf catalog =
analyzer.getEnv().getCatalogMgr().getCatalogOrAnalysisException(catalogName);
+ CatalogIf catalog = analyzer.getEnv().getCatalogMgr()
+ .getCatalogOrAnalysisException(catalogName);
DatabaseIf db = catalog.getDbOrAnalysisException(dbName);
+ dbId = db.getId();
table = db.getTableOrAnalysisException(tblName);
checkAnalyzePriv(dbName, tblName);
- if (optColumnNames != null && !optColumnNames.isEmpty()) {
+ if (columnNames != null && !columnNames.isEmpty()) {
table.readLock();
try {
List<String> baseSchema = table.getBaseSchema(false)
.stream().map(Column::getName).collect(Collectors.toList());
- Optional<String> optional = optColumnNames.stream()
+ Optional<String> optional = columnNames.stream()
.filter(entity ->
!baseSchema.contains(entity)).findFirst();
if (optional.isPresent()) {
String columnName = optional.get();
@@ -137,14 +125,10 @@ public class AnalyzeStmt extends DdlStmt {
} finally {
table.readUnlock();
}
- } else {
- optColumnNames = table.getBaseSchema(false)
-
.stream().map(Column::getName).collect(Collectors.toList());
}
- dbId = db.getId();
- // step2: analyze partition
+
checkPartitionNames();
- // step3: analyze properties
+
checkProperties();
}
@@ -166,46 +150,81 @@ public class AnalyzeStmt extends DdlStmt {
}
private void checkPartitionNames() throws AnalysisException {
- if (optPartitionNames != null) {
- optPartitionNames.analyze(analyzer);
- if (tableName != null) {
- Database db =
analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(tableName.getDb());
- OlapTable olapTable = (OlapTable)
db.getTableOrAnalysisException(tableName.getTbl());
- if (!olapTable.isPartitioned()) {
- throw new AnalysisException("Not a partitioned table: " +
olapTable.getName());
- }
- List<String> names = optPartitionNames.getPartitionNames();
- Set<String> olapPartitionNames = olapTable.getPartitionNames();
- List<String> tempPartitionNames =
olapTable.getTempPartitions().stream()
- .map(Partition::getName).collect(Collectors.toList());
- Optional<String> optional = names.stream()
- .filter(name -> (tempPartitionNames.contains(name)
- || !olapPartitionNames.contains(name)))
- .findFirst();
- if (optional.isPresent()) {
- throw new AnalysisException("Temporary partition or
partition does not exist");
- }
- } else {
- throw new AnalysisException("Specify partition should specify
table name as well");
+ if (partitionNames != null) {
+ partitionNames.analyze(analyzer);
+ Database db = analyzer.getEnv().getInternalCatalog()
+ .getDbOrAnalysisException(tableName.getDb());
+ OlapTable olapTable = (OlapTable)
db.getTableOrAnalysisException(tableName.getTbl());
+ if (!olapTable.isPartitioned()) {
+ throw new AnalysisException("Not a partitioned table: " +
olapTable.getName());
+ }
+ List<String> names = partitionNames.getPartitionNames();
+ Set<String> olapPartitionNames = olapTable.getPartitionNames();
+ List<String> tempPartitionNames =
olapTable.getTempPartitions().stream()
+ .map(Partition::getName).collect(Collectors.toList());
+ Optional<String> illegalPartitionName = names.stream()
+ .filter(name -> (tempPartitionNames.contains(name)
+ || !olapPartitionNames.contains(name)))
+ .findFirst();
+ if (illegalPartitionName.isPresent()) {
+ throw new AnalysisException("Temporary partition or partition
does not exist");
}
- partitionNames.addAll(optPartitionNames.getPartitionNames());
}
}
private void checkProperties() throws UserException {
- if (optProperties == null) {
- optProperties = Maps.newHashMap();
- } else {
- Optional<String> optional = optProperties.keySet().stream().filter(
+ if (properties != null) {
+ Optional<String> optional = properties.keySet().stream().filter(
entity -> !PROPERTIES_SET.contains(entity)).findFirst();
if (optional.isPresent()) {
throw new AnalysisException(optional.get() + " is invalid
property");
}
+
+ long taskTimeout = ((Long) Util.getLongPropertyOrDefault(
+ properties.get(CBO_STATISTICS_TASK_TIMEOUT_SEC),
+ Config.max_cbo_statistics_task_timeout_sec,
DESIRED_TASK_TIMEOUT_SEC,
+ CBO_STATISTICS_TASK_TIMEOUT_SEC + " should >
0")).intValue();
+ properties.put(CBO_STATISTICS_TASK_TIMEOUT_SEC,
String.valueOf(taskTimeout));
}
- long taskTimeout = ((Long)
Util.getLongPropertyOrDefault(optProperties.get(CBO_STATISTICS_TASK_TIMEOUT_SEC),
- Config.max_cbo_statistics_task_timeout_sec,
DESIRED_TASK_TIMEOUT_SEC,
- CBO_STATISTICS_TASK_TIMEOUT_SEC + " should > 0")).intValue();
- optProperties.put(CBO_STATISTICS_TASK_TIMEOUT_SEC,
String.valueOf(taskTimeout));
+ }
+
+ public String getCatalogName() {
+ return tableName.getCtl();
+ }
+
+ public long getDbId() {
+ return dbId;
+ }
+
+ public String getDBName() {
+ return tableName.getDb();
+ }
+
+ public Database getDb() throws AnalysisException {
+ return
analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId);
+ }
+
+ public TableIf getTable() {
+ return table;
+ }
+
+ public TableName getTblName() {
+ return tableName;
+ }
+
+ public Set<String> getColumnNames() {
+ return columnNames == null ? table.getBaseSchema(false)
+ .stream().map(Column::getName).collect(Collectors.toSet()) :
Sets.newHashSet(columnNames);
+ }
+
+ public Set<String> getPartitionNames() {
+ return partitionNames == null ?
Sets.newHashSet(table.getPartitionNames())
+ : Sets.newHashSet(partitionNames.getPartitionNames());
+ }
+
+ public Map<String, String> getProperties() {
+ // TODO add default properties
+ return properties != null ? properties : Maps.newHashMap();
}
@Override
@@ -218,21 +237,26 @@ public class AnalyzeStmt extends DdlStmt {
sb.append(tableName.toSql());
}
- if (optColumnNames != null) {
+ if (isHistogram) {
+ sb.append(" ");
+ sb.append("UPDATE HISTOGRAM ON");
+ sb.append(" ");
+ sb.append(StringUtils.join(columnNames, ","));
+ } else if (columnNames != null) {
sb.append("(");
- sb.append(StringUtils.join(optColumnNames, ","));
+ sb.append(StringUtils.join(columnNames, ","));
sb.append(")");
}
- if (optPartitionNames != null) {
+ if (partitionNames != null) {
sb.append(" ");
- sb.append(optPartitionNames.toSql());
+ sb.append(partitionNames.toSql());
}
- if (optProperties != null) {
+ if (properties != null) {
sb.append(" ");
sb.append("PROPERTIES(");
- sb.append(new PrintableMap<>(optProperties, " = ",
+ sb.append(new PrintableMap<>(properties, " = ",
true,
false));
sb.append(")");
@@ -240,48 +264,4 @@ public class AnalyzeStmt extends DdlStmt {
return sb.toString();
}
-
- public String getCatalogName() {
- return tableName.getCtl();
- }
-
- public String getDBName() {
- return tableName.getDb();
- }
-
- public TableName getTblName() {
- return tableName;
- }
-
- public List<String> getOptColumnNames() {
- return optColumnNames;
- }
-
-
- public long getDbId() {
- Preconditions.checkArgument(isAnalyzed(),
- "The dbId must be obtained after the parsing is complete");
- return dbId;
- }
-
- public Database getDb() throws AnalysisException {
- Preconditions.checkArgument(isAnalyzed(),
- "The db must be obtained after the parsing is complete");
- return
analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId);
- }
-
- public TableIf getTable() {
- return table;
- }
-
- public List<String> getPartitionNames() {
- Preconditions.checkArgument(isAnalyzed(),
- "The partitionNames must be obtained after the parsing is
complete");
- return partitionNames;
- }
-
- public Map<String, String> getProperties() {
- return optProperties;
- }
-
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index a8f95d9cd4..bc8e944a52 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -45,6 +45,7 @@ import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
@@ -89,7 +90,8 @@ public class AnalysisManager {
String db = analyzeStmt.getDBName();
TableName tbl = analyzeStmt.getTblName();
StatisticsUtil.convertTableNameToObjects(tbl);
- List<String> colNames = analyzeStmt.getOptColumnNames();
+ Set<String> colNames = analyzeStmt.getColumnNames();
+ Set<String> partitionNames = analyzeStmt.getPartitionNames();
Map<Long, AnalysisTaskInfo> analysisTaskInfos = new HashMap<>();
long jobId = Env.getCurrentEnv().getNextId();
if (colNames != null) {
@@ -98,7 +100,8 @@ public class AnalysisManager {
AnalysisType analType = analyzeStmt.isHistogram ?
AnalysisType.HISTOGRAM : AnalysisType.COLUMN;
AnalysisTaskInfo analysisTaskInfo = new
AnalysisTaskInfoBuilder().setJobId(jobId)
.setTaskId(taskId).setCatalogName(catalogName).setDbName(db)
-
.setTblName(tbl.getTbl()).setColName(colName).setJobType(JobType.MANUAL)
+ .setTblName(tbl.getTbl()).setColName(colName)
+
.setPartitionNames(partitionNames).setJobType(JobType.MANUAL)
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(analType)
.setState(AnalysisState.PENDING)
.setScheduleType(ScheduleType.ONCE).build();
@@ -110,7 +113,7 @@ public class AnalysisManager {
analysisTaskInfos.put(taskId, analysisTaskInfo);
}
}
- if (analyzeStmt.wholeTbl &&
analyzeStmt.getTable().getType().equals(TableType.OLAP)) {
+ if (analyzeStmt.isWholeTbl &&
analyzeStmt.getTable().getType().equals(TableType.OLAP)) {
OlapTable olapTable = (OlapTable) analyzeStmt.getTable();
try {
olapTable.readLock();
@@ -122,7 +125,8 @@ public class AnalysisManager {
AnalysisTaskInfo analysisTaskInfo = new
AnalysisTaskInfoBuilder().setJobId(
jobId).setTaskId(taskId)
.setCatalogName(catalogName).setDbName(db)
-
.setTblName(tbl.getTbl()).setIndexId(meta.getIndexId()).setJobType(JobType.MANUAL)
+
.setTblName(tbl.getTbl()).setPartitionNames(partitionNames)
+
.setIndexId(meta.getIndexId()).setJobType(JobType.MANUAL)
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.INDEX)
.setScheduleType(ScheduleType.ONCE).build();
try {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
index b0b1c370cb..8c9acac24f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
@@ -20,6 +20,7 @@ package org.apache.doris.statistics;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import java.util.Set;
import java.util.StringJoiner;
public class AnalysisTaskInfo {
@@ -62,6 +63,8 @@ public class AnalysisTaskInfo {
public final String colName;
+ public final Set<String> partitionNames;
+
public final Long indexId;
public final JobType jobType;
@@ -84,15 +87,16 @@ public class AnalysisTaskInfo {
public final ScheduleType scheduleType;
public AnalysisTaskInfo(long jobId, long taskId, String catalogName,
String dbName, String tblName,
- String colName, Long indexId, JobType jobType,
- AnalysisMethod analysisMethod, AnalysisType analysisType, String
message, int lastExecTimeInMs,
- AnalysisState state, ScheduleType scheduleType) {
+ String colName, Set<String> partitionNames, Long indexId, JobType
jobType,
+ AnalysisMethod analysisMethod, AnalysisType analysisType, String
message,
+ int lastExecTimeInMs, AnalysisState state, ScheduleType
scheduleType) {
this.jobId = jobId;
this.taskId = taskId;
this.catalogName = catalogName;
this.dbName = dbName;
this.tblName = tblName;
this.colName = colName;
+ this.partitionNames = partitionNames;
this.indexId = indexId;
this.jobType = jobType;
this.analysisMethod = analysisMethod;
@@ -106,15 +110,16 @@ public class AnalysisTaskInfo {
@Override
public String toString() {
StringJoiner sj = new StringJoiner("\n", getClass().getName() + ":\n",
"\n");
- sj.add("JobId: " + String.valueOf(jobId));
+ sj.add("JobId: " + jobId);
sj.add("CatalogName: " + catalogName);
sj.add("DBName: " + dbName);
sj.add("TableName: " + tblName);
sj.add("ColumnName: " + colName);
+ sj.add("PartitionNames: " + partitionNames);
sj.add("TaskType: " + analysisType.toString());
sj.add("TaskMethod: " + analysisMethod.toString());
sj.add("Message: " + message);
- sj.add("LastExecTime: " + String.valueOf(lastExecTimeInMs));
+ sj.add("LastExecTime: " + lastExecTimeInMs);
sj.add("CurrentState: " + state.toString());
return sj.toString();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
index cc3b7b62f1..e687d6fe93 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
@@ -22,6 +22,8 @@ import
org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
import org.apache.doris.statistics.AnalysisTaskInfo.JobType;
import org.apache.doris.statistics.AnalysisTaskInfo.ScheduleType;
+import java.util.Set;
+
public class AnalysisTaskInfoBuilder {
private long jobId;
private long taskId;
@@ -29,6 +31,7 @@ public class AnalysisTaskInfoBuilder {
private String dbName;
private String tblName;
private String colName;
+ private Set<String> partitionNames;
private Long indexId;
private JobType jobType;
private AnalysisMethod analysisMethod;
@@ -68,6 +71,11 @@ public class AnalysisTaskInfoBuilder {
return this;
}
+ public AnalysisTaskInfoBuilder setPartitionNames(Set<String>
partitionNames) {
+ this.partitionNames = partitionNames;
+ return this;
+ }
+
public AnalysisTaskInfoBuilder setIndexId(Long indexId) {
this.indexId = indexId;
return this;
@@ -109,7 +117,7 @@ public class AnalysisTaskInfoBuilder {
}
public AnalysisTaskInfo build() {
- return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName,
tblName, colName, indexId, jobType,
- analysisMethod, analysisType, message, lastExecTimeInMs,
state, scheduleType);
+ return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName,
tblName, colName, partitionNames,
+ indexId, jobType, analysisMethod, analysisType, message,
lastExecTimeInMs, state, scheduleType);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
index a58696c542..73cb2d1c3d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
@@ -28,14 +28,15 @@ import org.apache.commons.text.StringSubstitutor;
import java.util.HashMap;
import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
/**
* Each task analyze one column.
*/
public class HistogramTask extends BaseAnalysisTask {
- /** To avoid too much data, use the following efficient sampling method */
- private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE = "INSERT INTO "
+ private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE = "INSERT
INTO "
+ "${internalDB}.${histogramStatTbl} "
+ "SELECT "
+ " CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
@@ -48,7 +49,10 @@ public class HistogramTask extends BaseAnalysisTask {
+ " HISTOGRAM(`${colName}`, 1, ${maxBucketNum}) AS buckets, "
+ " NOW() AS create_time "
+ "FROM "
- + " `${dbName}`.`${tblName}` TABLESAMPLE (${percentValue}
PERCENT)";
+ + " `${dbName}`.`${tblName}`";
+
+ private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART =
ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE
+ + " PARTITION (${partName})";
@VisibleForTesting
public HistogramTask() {
@@ -76,8 +80,26 @@ public class HistogramTask extends BaseAnalysisTask {
params.put("maxBucketNum", String.valueOf(info.maxBucketNum));
params.put("percentValue", String.valueOf((int) (info.sampleRate *
100)));
- StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
- String histogramSql =
stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE);
+ String histogramSql;
+ Set<String> partitionNames = info.partitionNames;
+
+ if (partitionNames.isEmpty()) {
+ StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
+ histogramSql =
stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE);
+ } else {
+ try {
+ tbl.readLock();
+ String partNames = info.partitionNames.stream()
+ .filter(x -> tbl.getPartition(x) != null)
+ .collect(Collectors.joining(","));
+ params.put("partName", partNames);
+ StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
+ histogramSql =
stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART);
+ } finally {
+ tbl.readUnlock();
+ }
+ }
+
LOG.info("SQL to collect the histogram:\n {}", histogramSql);
try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext())
{
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
index 5c66fd062b..c91398a0a6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
@@ -31,7 +31,6 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.MaterializedIndexMeta;
import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.catalog.Partition;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.util.SqlParserUtils;
import org.apache.doris.statistics.util.StatisticsUtil;
@@ -39,7 +38,7 @@ import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.base.Preconditions;
import java.io.StringReader;
-import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -98,9 +97,9 @@ public class MVAnalysisTask extends BaseAnalysisTask {
.get();
selectItem.setAlias(column.getName());
Map<String, String> params = new HashMap<>();
- for (Partition part : olapTable.getAllPartitions()) {
- String partName = part.getName();
- PartitionNames partitionName = new PartitionNames(false,
Arrays.asList(partName));
+ for (String partName : info.partitionNames) {
+ PartitionNames partitionName = new PartitionNames(false,
+ Collections.singletonList(partName));
tableRef.setPartitionNames(partitionName);
String sql = selectOne.toSql();
params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
@@ -111,8 +110,9 @@ public class MVAnalysisTask extends BaseAnalysisTask {
params.put("idxId", String.valueOf(meta.getIndexId()));
String colName = column.getName();
params.put("colId", colName);
- long partId = part.getId();
- params.put("partId", String.valueOf(partId));
+ String partId = olapTable.getPartition(partName) == null ?
"NULL" :
+
String.valueOf(olapTable.getPartition(partName).getId());
+ params.put("partId", partId);
params.put("dataSizeFunction", getDataSizeFunction(column));
params.put("dbName", info.dbName);
params.put("colName", colName);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 97cd92c29c..a8ec452d95 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -42,6 +42,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
+ "FROM `${dbName}`.`${tblName}` "
+ "PARTITION ${partName}";
+ // TODO Currently, NDV is computed for the full table; in fact,
+ // NDV should only be computed for the relevant partition.
private static final String ANALYZE_COLUMN_SQL_TEMPLATE =
INSERT_COL_STATISTICS
+ " (SELECT NDV(`${colName}`) AS ndv "
+ " FROM `${dbName}`.`${tblName}`) t2\n";
@@ -71,7 +73,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
List<String> partitionAnalysisSQLs = new ArrayList<>();
try {
tbl.readLock();
- Set<String> partNames = tbl.getPartitionNames();
+ Set<String> partNames = info.partitionNames;
for (String partName : partNames) {
Partition part = tbl.getPartition(partName);
if (part == null) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
index 2cb428e402..8efe587cad 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
@@ -28,6 +28,7 @@ import org.apache.doris.statistics.AnalysisTaskInfo.JobType;
import org.apache.doris.statistics.util.StatisticsUtil;
import org.apache.doris.utframe.TestWithFeService;
+import com.google.common.collect.Sets;
import mockit.Expectations;
import mockit.Mock;
import mockit.MockUp;
@@ -110,6 +111,7 @@ public class AnalysisJobTest extends TestWithFeService {
.setCatalogName("internal").setDbName("default_cluster:analysis_job_test").setTblName("t1")
.setColName("col1").setJobType(JobType.MANUAL).setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(
AnalysisType.COLUMN)
+ .setPartitionNames(Sets.newHashSet("t1"))
.build();
new OlapAnalysisTask(scheduler, analysisJobInfo).execute();
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
index 8dfcff429b..0b947133e4 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
@@ -26,6 +26,7 @@ import org.apache.doris.statistics.AnalysisTaskInfo.JobType;
import org.apache.doris.statistics.util.BlockingCounter;
import org.apache.doris.utframe.TestWithFeService;
+import com.google.common.collect.Sets;
import mockit.Expectations;
import mockit.Mock;
import mockit.MockUp;
@@ -95,6 +96,7 @@ public class AnalysisTaskExecutorTest extends
TestWithFeService {
.setCatalogName("internal").setDbName("default_cluster:analysis_job_test").setTblName("t1")
.setColName("col1").setJobType(JobType.MANUAL).setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(
AnalysisType.COLUMN)
+ .setPartitionNames(Sets.newHashSet("t1"))
.build();
OlapAnalysisTask task = new OlapAnalysisTask(analysisTaskScheduler,
analysisTaskInfo);
new MockUp<AnalysisTaskScheduler>() {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java
new file mode 100644
index 0000000000..5026ef3353
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.jmockit.Deencapsulation;
+import org.apache.doris.qe.StmtExecutor;
+import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod;
+import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
+import org.apache.doris.statistics.AnalysisTaskInfo.JobType;
+import org.apache.doris.system.SystemInfoService;
+import org.apache.doris.utframe.TestWithFeService;
+
+import com.google.common.collect.Sets;
+import mockit.Expectations;
+import mockit.Mock;
+import mockit.MockUp;
+import mockit.Mocked;
+import mockit.Tested;
+import org.junit.FixMethodOrder;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.runners.MethodSorters;
+
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentMap;
+
+@FixMethodOrder(value = MethodSorters.NAME_ASCENDING)
+public class HistogramTaskTest extends TestWithFeService {
+
+ @Mocked
+ AnalysisTaskScheduler analysisTaskScheduler;
+
+ @Override
+ protected void runBeforeAll() throws Exception {
+ createDatabase("histogram_task_test");
+ connectContext.setDatabase(SystemInfoService.DEFAULT_CLUSTER + ":" +
"histogram_task_test");
+ createTable(
+ "CREATE TABLE t1 (\n"
+ + " col1 date not null, \n"
+ + " col2 int not null, \n"
+ + " col3 int not null\n"
+ + ")\n"
+ + "PARTITION BY LIST(`col1`)\n"
+ + "(\n"
+ + " PARTITION `p_201701` VALUES IN
(\"2017-10-01\"),\n"
+ + " PARTITION `default`\n"
+ + ")\n"
+ + "DISTRIBUTED BY HASH(col3)\n"
+ + "BUCKETS 1\n"
+ + "PROPERTIES(\n"
+ + " \"replication_num\"=\"1\"\n"
+ + ")");
+ }
+
+ @Tested
+
+ @Test
+ public void test1TaskCreation() throws Exception {
+
+ AnalysisManager analysisManager =
Env.getCurrentEnv().getAnalysisManager();
+ StmtExecutor executor = getSqlStmtExecutor(
+ "ANALYZE TABLE t1 UPDATE HISTOGRAM ON col1 PARTITION
(p_201701)");
+ Assertions.assertNotNull(executor);
+
+ ConcurrentMap<Long, Map<Long, AnalysisTaskInfo>> taskMap =
+ Deencapsulation.getField(analysisManager,
"analysisJobIdToTaskMap");
+ Assertions.assertEquals(1, taskMap.size());
+
+ for (Entry<Long, Map<Long, AnalysisTaskInfo>> infoMap :
taskMap.entrySet()) {
+ Map<Long, AnalysisTaskInfo> taskInfo = infoMap.getValue();
+ Assertions.assertEquals(1, taskInfo.size());
+
+ for (Entry<Long, AnalysisTaskInfo> infoEntry :
taskInfo.entrySet()) {
+ AnalysisTaskInfo info = infoEntry.getValue();
+ Assertions.assertEquals(AnalysisType.HISTOGRAM,
info.analysisType);
+ Assertions.assertEquals("t1", info.tblName);
+ Assertions.assertEquals("col1", info.colName);
+ Assertions.assertEquals("p_201701",
info.partitionNames.iterator().next());
+ }
+ }
+ }
+
+ @Test
+ public void test2TaskExecution() throws Exception {
+ AnalysisTaskExecutor analysisTaskExecutor = new
AnalysisTaskExecutor(analysisTaskScheduler);
+ AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder()
+ .setJobId(0).setTaskId(0).setCatalogName("internal")
+ .setDbName(SystemInfoService.DEFAULT_CLUSTER + ":" +
"histogram_task_test").setTblName("t1")
+
.setColName("col1").setJobType(JobType.MANUAL).setAnalysisMethod(AnalysisMethod.FULL)
+ .setAnalysisType(AnalysisType.HISTOGRAM)
+ .setPartitionNames(Sets.newHashSet("t"))
+ .build();
+ HistogramTask task = new HistogramTask(analysisTaskScheduler,
analysisTaskInfo);
+
+ new MockUp<AnalysisTaskScheduler>() {
+ @Mock
+ public synchronized BaseAnalysisTask getPendingTasks() {
+ return task;
+ }
+ };
+ new MockUp<AnalysisManager>() {
+ @Mock
+ public void updateTaskStatus(AnalysisTaskInfo info, AnalysisState
jobState, String message, long time) {}
+ };
+ new Expectations() {
+ {
+ task.execute();
+ times = 1;
+ }
+ };
+
+ Deencapsulation.invoke(analysisTaskExecutor, "doFetchAndExecute");
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]