This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1a6401d682 [enchancement](statistics) support sampling collection of
statistics (#18880)
1a6401d682 is described below
commit 1a6401d682ed4aace882d2a3edcecd82e912b4da
Author: ElvinWei <[email protected]>
AuthorDate: Fri Apr 21 13:11:43 2023 +0800
[enchancement](statistics) support sampling collection of statistics
(#18880)
1. Supports sampling to collect statistics
2. Improved syntax for collecting statistics
3. Support histogram specifies the number of buckets
4. Tweaked some code structure
---
The syntax supports WITH and PROPERTIES, using the same syntax as before.
Column Statistics Collection Syntax:
```SQL
ANALYZE [ SYNC ] TABLE table_name
[ (column_name [, ...]) ]
[ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ]
[ PROPERTIES ('key' = 'value', ...) ];
```
Column histogram collection syntax:
```SQL
ANALYZE [ SYNC ] TABLE table_name
[ (column_name [, ...]) ]
UPDATE HISTOGRAM
[ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][
WITH BUCKETS ] ]
[ PROPERTIES ('key' = 'value', ...) ];
```
Illustrate:
- sync:Collect statistics synchronously. Return after collecting.
- incremental:Collect statistics incrementally. Incremental collection of
histogram statistics is not supported.
- sample percent | rows:Collect statistics by sampling. Scale and number of
rows can be sampled.
- buckets:Specifies the maximum number of buckets generated when collecting
histogram statistics.
- table_name: The purpose table for collecting statistics. Can be of the
form `db_name.table_name`.
- column_name: The specified destination column must be a column that
exists in `table_name`, and multiple column names are separated by commas.
- properties:Properties used to set statistics tasks. Currently only the
following configurations are supported (equivalent to the with statement)
- 'sync' = 'true'
- 'incremental' = 'true'
- 'sample.percent' = '50'
- 'sample.rows' = '1000'
- 'num.buckets' = 10
---
TODO:
- Supplement the complete p0 test
- `Incremental` statistics see #18653
---
fe/fe-core/src/main/cup/sql_parser.cup | 111 ++++++++--
.../org/apache/doris/analysis/AnalyzeStmt.java | 241 ++++++++++++++++-----
.../apache/doris/statistics/AnalysisManager.java | 114 ++++++----
.../apache/doris/statistics/AnalysisTaskInfo.java | 17 +-
.../doris/statistics/AnalysisTaskInfoBuilder.java | 44 +++-
.../apache/doris/statistics/BaseAnalysisTask.java | 12 +
.../org/apache/doris/statistics/HistogramTask.java | 55 ++---
.../apache/doris/statistics/MVAnalysisTask.java | 6 +-
.../apache/doris/statistics/OlapAnalysisTask.java | 6 +-
.../doris/statistics/StatisticConstants.java | 1 +
fe/fe-core/src/main/jflex/sql_scanner.flex | 1 +
.../apache/doris/statistics/HistogramTaskTest.java | 2 +-
.../data/statistics/show_stats_test.out | 7 +
.../suites/statistics/alter_col_stats.groovy | 2 +-
.../suites/statistics/analyze_test.groovy | 49 +++++
.../suites/statistics/show_stats_test.groovy | 78 +++++++
16 files changed, 572 insertions(+), 174 deletions(-)
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup
b/fe/fe-core/src/main/cup/sql_parser.cup
index 925f7b37b1..86c32c430c 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -540,6 +540,7 @@ terminal String
KW_ROW,
KW_ROWS,
KW_S3,
+ KW_SAMPLE,
KW_SCHEMA,
KW_SCHEMAS,
KW_SECOND,
@@ -890,6 +891,10 @@ nonterminal PauseSyncJobStmt pause_sync_job_stmt;
nonterminal StopSyncJobStmt stop_sync_job_stmt;
nonterminal JobName job_name;
+// analyze
+nonterminal Map<String, String> with_analysis_properties;
+nonterminal List<Map<String, String>> opt_with_analysis_properties;
+
nonterminal String opt_db, procedure_or_function, opt_comment, opt_engine;
nonterminal ColumnDef.DefaultValue opt_default_value;
nonterminal Boolean opt_if_exists, opt_if_not_exists;
@@ -2812,30 +2817,45 @@ show_create_reporitory_stmt ::=
// analyze statment
analyze_stmt ::=
- KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols
opt_properties:properties
- {:
- boolean is_whole_tbl = (cols == null);
- boolean is_histogram = false;
- boolean is_increment = false;
- RESULT = new AnalyzeStmt(tbl, sync, cols, properties, is_whole_tbl,
is_histogram, is_increment);
- :}
- | KW_ANALYZE opt_sync:sync KW_INCREMENTAL KW_TABLE table_name:tbl
opt_col_list:cols opt_partition_names:partitionNames opt_properties:properties
- {:
- boolean is_whole_tbl = (cols == null);
- boolean is_histogram = false;
- boolean is_increment = true;
- RESULT = new AnalyzeStmt(tbl, sync, cols, properties, is_whole_tbl,
is_histogram, is_increment);
- :}
- | KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM
KW_ON ident_list:cols opt_partition_names:partitionNames
opt_properties:properties
+ // statistics
+ KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols
+ opt_with_analysis_properties:withAnalysisProperties
opt_properties:properties
{:
- boolean is_whole_tbl = false;
- boolean is_histogram = true;
- boolean is_increment = false;
- RESULT = new AnalyzeStmt(tbl, sync, cols, properties, is_whole_tbl,
is_histogram, is_increment);
+ if (properties == null) {
+ properties = Maps.newHashMap();
+ }
+ for (Map<String, String> property : withAnalysisProperties) {
+ properties.putAll(property);
+ }
+ if (!properties.containsKey("sync")) {
+ properties.put("sync", String.valueOf(sync));
+ }
+ // Rule: If no type is specified, see if there is a specified column
+ if (!properties.containsKey("analysis.type")) {
+ if ((cols == null)) {
+ properties.put("analysis.type", "INDEX");
+ } else {
+ properties.put("analysis.type", "COLUMN");
+ }
+ }
+ RESULT = new AnalyzeStmt(tbl, cols, properties);
:}
- | KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM
+ // histogram
+ | KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols
KW_UPDATE KW_HISTOGRAM
+ opt_with_analysis_properties:withAnalysisProperties
opt_properties:properties
{:
- RESULT = new AnalyzeStmt(tbl, sync, null, new HashMap<>(), true, true,
false);
+ if (properties == null) {
+ properties = Maps.newHashMap();
+ }
+ for (Map<String, String> property : withAnalysisProperties) {
+ properties.putAll(property);
+ }
+ if (!properties.containsKey("sync")) {
+ properties.put("sync", String.valueOf(sync));
+ }
+ // TODO: Support materialized view
+ properties.put("analysis.type", "HISTOGRAM");
+ RESULT = new AnalyzeStmt(tbl, cols, properties);
:}
;
@@ -5730,6 +5750,53 @@ ident_list ::=
:}
;
+with_analysis_properties ::=
+ KW_SYNC
+ {:
+ RESULT = new HashMap<String, String>() {{
+ put("sync", "true");
+ }};
+ :}
+ | KW_INCREMENTAL
+ {:
+ RESULT = new HashMap<String, String>() {{
+ put("incremental", "true");
+ }};
+ :}
+ | KW_SAMPLE KW_PERCENT INTEGER_LITERAL:samplePercent
+ {:
+ RESULT = new HashMap<String, String>() {{
+ put("sample.percent", String.valueOf(samplePercent.intValue()));
+ }};
+ :}
+ | KW_SAMPLE KW_ROWS INTEGER_LITERAL:sampleRows
+ {:
+ RESULT = new HashMap<String, String>() {{
+ put("sample.rows", String.valueOf(sampleRows.intValue()));
+ }};
+ :}
+ | KW_BUCKETS INTEGER_LITERAL:numBuckets
+ {:
+ RESULT = new HashMap<String, String>() {{
+ put("num.buckets", String.valueOf(numBuckets.intValue()));
+ }};
+ :}
+ ;
+
+opt_with_analysis_properties ::=
+ /* empty */
+ {:
+ RESULT = Lists.newArrayList();
+ :}
+ | opt_with_analysis_properties:withAnalysisProperties
+ KW_WITH with_analysis_properties:property
+ {:
+ withAnalysisProperties.add(property);
+ RESULT = withAnalysisProperties;
+ :}
+ ;
+
+
expr_list ::=
expr:e
{:
@@ -7368,6 +7435,8 @@ keyword ::=
{: RESULT = id; :}
| KW_EXPIRED: id
{: RESULT = id; :}
+ | KW_SAMPLE:id
+ {: RESULT = id; :}
;
// Identifier that contain keyword
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
index d189a8aeee..093dfad776 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
@@ -21,22 +21,22 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.View;
import org.apache.doris.common.AnalysisException;
-import org.apache.doris.common.Config;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.FeNameFormat;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.PrintableMap;
-import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod;
+import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;
@@ -44,35 +44,57 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
-import java.util.function.Predicate;
import java.util.stream.Collectors;
/**
- * Collect statistics.
- *
- * syntax:
- * ANALYZE [[ db_name.tb_name ] [( column_name [, ...] )], ...] [
PROPERTIES(...) ]
- * db_name.tb_name: collect table and column statistics from tb_name
- * column_name: collect column statistics from column_name
- * properties: properties of statistics jobs
+ * Column Statistics Collection Syntax:
+ * ANALYZE [ SYNC ] TABLE table_name
+ * [ (column_name [, ...]) ]
+ * [ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ]
+ * [ PROPERTIES ('key' = 'value', ...) ];
+ *
+ * Column histogram collection syntax:
+ * ANALYZE [ SYNC ] TABLE table_name
+ * [ (column_name [, ...]) ]
+ * UPDATE HISTOGRAM
+ * [ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][ WITH
BUCKETS ] ]
+ * [ PROPERTIES ('key' = 'value', ...) ];
+ *
+ * Illustrate:
+ * - sync:Collect statistics synchronously. Return after collecting.
+ * - incremental:Collect statistics incrementally. Incremental collection of
histogram statistics is not supported.
+ * - sample percent | rows:Collect statistics by sampling. Scale and number
of rows can be sampled.
+ * - buckets:Specifies the maximum number of buckets generated when
collecting histogram statistics.
+ * - table_name: The purpose table for collecting statistics. Can be of the
form `db_name.table_name`.
+ * - column_name: The specified destination column must be a column that
exists in `table_name`,
+ * and multiple column names are separated by commas.
+ * - properties:Properties used to set statistics tasks. Currently only the
following configurations
+ * are supported (equivalent to the with statement)
+ * - 'sync' = 'true'
+ * - 'incremental' = 'true'
+ * - 'sample.percent' = '50'
+ * - 'sample.rows' = '1000'
+ * - 'num.buckets' = 10
*/
public class AnalyzeStmt extends DdlStmt {
- // time to wait for collect statistics
- public static final String CBO_STATISTICS_TASK_TIMEOUT_SEC =
"cbo_statistics_task_timeout_sec";
+ // The properties passed in by the user through "with" or "properties('K',
'V')"
+ public static final String PROPERTY_SYNC = "sync";
+ public static final String PROPERTY_INCREMENTAL = "incremental";
+ public static final String PROPERTY_SAMPLE_PERCENT = "sample.percent";
+ public static final String PROPERTY_SAMPLE_ROWS = "sample.rows";
+ public static final String PROPERTY_NUM_BUCKETS = "num.buckets";
+ public static final String PROPERTY_ANALYSIS_TYPE = "analysis.type";
private static final ImmutableSet<String> PROPERTIES_SET = new
ImmutableSet.Builder<String>()
- .add(CBO_STATISTICS_TASK_TIMEOUT_SEC)
+ .add(PROPERTY_SYNC)
+ .add(PROPERTY_INCREMENTAL)
+ .add(PROPERTY_SAMPLE_PERCENT)
+ .add(PROPERTY_SAMPLE_ROWS)
+ .add(PROPERTY_NUM_BUCKETS)
+ .add(PROPERTY_ANALYSIS_TYPE)
.build();
- private static final Predicate<Long> DESIRED_TASK_TIMEOUT_SEC = (v) -> v >
0L;
-
- public boolean isWholeTbl;
- public boolean isHistogram;
- public boolean isIncrement;
-
private final TableName tableName;
-
- private final boolean sync;
private final List<String> columnNames;
private final Map<String, String> properties;
@@ -81,19 +103,11 @@ public class AnalyzeStmt extends DdlStmt {
private TableIf table;
public AnalyzeStmt(TableName tableName,
- boolean sync,
- List<String> columnNames,
- Map<String, String> properties,
- Boolean isWholeTbl,
- Boolean isHistogram,
- Boolean isIncrement) {
+ List<String> columnNames,
+ Map<String, String> properties) {
this.tableName = tableName;
- this.sync = sync;
this.columnNames = columnNames;
this.properties = properties;
- this.isWholeTbl = isWholeTbl;
- this.isHistogram = isHistogram;
- this.isIncrement = isIncrement;
}
@Override
@@ -133,6 +147,15 @@ public class AnalyzeStmt extends DdlStmt {
}
checkProperties();
+
+ // TODO support external table
+ if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)
+ || properties.containsKey(PROPERTY_SAMPLE_ROWS)) {
+ if (!(table instanceof OlapTable)) {
+ throw new AnalysisException("Sampling statistics "
+ + "collection of external tables is not supported");
+ }
+ }
}
@Override
@@ -153,18 +176,88 @@ public class AnalyzeStmt extends DdlStmt {
}
private void checkProperties() throws UserException {
- if (properties != null) {
- Optional<String> optional = properties.keySet().stream().filter(
- entity -> !PROPERTIES_SET.contains(entity)).findFirst();
- if (optional.isPresent()) {
- throw new AnalysisException(optional.get() + " is invalid
property");
+ if (properties == null || properties.isEmpty()) {
+ throw new AnalysisException("analysis properties should not be
empty");
+ }
+
+ String msgTemplate = "%s = %s is invalid property";
+ Optional<String> optional = properties.keySet().stream().filter(
+ entity -> !PROPERTIES_SET.contains(entity)).findFirst();
+
+ if (optional.isPresent()) {
+ String msg = String.format(msgTemplate, optional.get(),
properties.get(optional.get()));
+ throw new AnalysisException(msg);
+ }
+
+ if (properties.containsKey(PROPERTY_SYNC)) {
+ try {
+ Boolean.valueOf(properties.get(PROPERTY_SYNC));
+ } catch (NumberFormatException e) {
+ String msg = String.format(msgTemplate, PROPERTY_SYNC,
properties.get(PROPERTY_SYNC));
+ throw new AnalysisException(msg);
+ }
+ }
+
+ if (properties.containsKey(PROPERTY_INCREMENTAL)) {
+ try {
+ Boolean.valueOf(properties.get(PROPERTY_INCREMENTAL));
+ } catch (NumberFormatException e) {
+ String msg = String.format(msgTemplate, PROPERTY_INCREMENTAL,
properties.get(PROPERTY_INCREMENTAL));
+ throw new AnalysisException(msg);
}
+ }
+
+ if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)
+ && properties.containsKey(PROPERTY_SAMPLE_ROWS)) {
+ throw new AnalysisException("only one sampling parameter can be
specified simultaneously");
+ }
+
+ if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)) {
+ checkNumericProperty(PROPERTY_SAMPLE_PERCENT,
properties.get(PROPERTY_SAMPLE_PERCENT),
+ 0, 100, false, "should be > 0 and < 100");
+ }
- long taskTimeout = ((Long) Util.getLongPropertyOrDefault(
- properties.get(CBO_STATISTICS_TASK_TIMEOUT_SEC),
- Config.max_cbo_statistics_task_timeout_sec,
DESIRED_TASK_TIMEOUT_SEC,
- CBO_STATISTICS_TASK_TIMEOUT_SEC + " should >
0")).intValue();
- properties.put(CBO_STATISTICS_TASK_TIMEOUT_SEC,
String.valueOf(taskTimeout));
+ if (properties.containsKey(PROPERTY_SAMPLE_ROWS)) {
+ checkNumericProperty(PROPERTY_SAMPLE_ROWS,
properties.get(PROPERTY_SAMPLE_ROWS),
+ 0, Integer.MAX_VALUE, false, "needs at least 1 row");
+ }
+
+ if (properties.containsKey(PROPERTY_NUM_BUCKETS)) {
+ checkNumericProperty(PROPERTY_NUM_BUCKETS,
properties.get(PROPERTY_NUM_BUCKETS),
+ 1, Integer.MAX_VALUE, true, "needs at least 1 buckets");
+ }
+
+ if (properties.containsKey(PROPERTY_ANALYSIS_TYPE)) {
+ try {
+ AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE));
+ } catch (NumberFormatException e) {
+ String msg = String.format(msgTemplate,
PROPERTY_ANALYSIS_TYPE, properties.get(PROPERTY_ANALYSIS_TYPE));
+ throw new AnalysisException(msg);
+ }
+ }
+
+ if (properties.containsKey(PROPERTY_INCREMENTAL)
+ &&
AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) ==
AnalysisType.HISTOGRAM) {
+ throw new AnalysisException(PROPERTY_INCREMENTAL + " collection of
histograms is not supported");
+ }
+
+ if (properties.containsKey(PROPERTY_NUM_BUCKETS)
+ &&
AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) !=
AnalysisType.HISTOGRAM) {
+ throw new AnalysisException(PROPERTY_NUM_BUCKETS + " can only be
specified when collecting histograms");
+ }
+ }
+
+ private void checkNumericProperty(String key, String value, int
lowerBound, int upperBound,
+ boolean includeBoundary, String errorMsg) throws AnalysisException
{
+ if (!StringUtils.isNumeric(value)) {
+ String msg = String.format("%s = %s is an invalid property.", key,
value);
+ throw new AnalysisException(msg);
+ }
+ int intValue = Integer.parseInt(value);
+ boolean isOutOfBounds = (includeBoundary && (intValue < lowerBound ||
intValue > upperBound))
+ || (!includeBoundary && (intValue <= lowerBound || intValue >=
upperBound));
+ if (isOutOfBounds) {
+ throw new AnalysisException(key + " " + errorMsg);
}
}
@@ -198,8 +291,47 @@ public class AnalyzeStmt extends DdlStmt {
}
public Map<String, String> getProperties() {
- // TODO add default properties
- return properties != null ? properties : Maps.newHashMap();
+ return properties;
+ }
+
+ public boolean isSync() {
+ return Boolean.parseBoolean(properties.get(PROPERTY_SYNC));
+ }
+
+ public boolean isIncremental() {
+ return Boolean.parseBoolean(properties.get(PROPERTY_INCREMENTAL));
+ }
+
+ public int getSamplePercent() {
+ if (!properties.containsKey(PROPERTY_SAMPLE_PERCENT)) {
+ return 0;
+ }
+ return Integer.parseInt(properties.get(PROPERTY_SAMPLE_PERCENT));
+ }
+
+ public int getSampleRows() {
+ if (!properties.containsKey(PROPERTY_SAMPLE_ROWS)) {
+ return 0;
+ }
+ return Integer.parseInt(properties.get(PROPERTY_SAMPLE_ROWS));
+ }
+
+ public int getNumBuckets() {
+ if (!properties.containsKey(PROPERTY_NUM_BUCKETS)) {
+ return 0;
+ }
+ return Integer.parseInt(properties.get(PROPERTY_NUM_BUCKETS));
+ }
+
+ public AnalysisType getAnalysisType() {
+ return AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE));
+ }
+
+ public AnalysisMethod getAnalysisMethod() {
+ if (getSamplePercent() > 0 || getSampleRows() > 0) {
+ return AnalysisMethod.SAMPLE;
+ }
+ return AnalysisMethod.FULL;
}
@Override
@@ -207,27 +339,22 @@ public class AnalyzeStmt extends DdlStmt {
StringBuilder sb = new StringBuilder();
sb.append("ANALYZE");
- if (isIncrement) {
- sb.append(" ");
- sb.append("INCREMENTAL");
- }
-
if (tableName != null) {
sb.append(" ");
sb.append(tableName.toSql());
}
- if (isHistogram) {
- sb.append(" ");
- sb.append("UPDATE HISTOGRAM ON");
- sb.append(" ");
- sb.append(StringUtils.join(columnNames, ","));
- } else if (columnNames != null) {
+ if (columnNames != null) {
sb.append("(");
sb.append(StringUtils.join(columnNames, ","));
sb.append(")");
}
+ if (getAnalysisType().equals(AnalysisType.HISTOGRAM)) {
+ sb.append(" ");
+ sb.append("UPDATE HISTOGRAM");
+ }
+
if (properties != null) {
sb.append(" ");
sb.append("PROPERTIES(");
@@ -239,8 +366,4 @@ public class AnalyzeStmt extends DdlStmt {
return sb.toString();
}
-
- public boolean isSync() {
- return sync;
- }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 433135d5c8..4f2c3abe9a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -90,19 +90,17 @@ public class AnalysisManager {
}
// Each analyze stmt corresponding to an analysis job.
- public void createAnalysisJob(AnalyzeStmt analyzeStmt) throws DdlException
{
- String catalogName = analyzeStmt.getCatalogName();
- String db = analyzeStmt.getDBName();
- TableName tbl = analyzeStmt.getTblName();
- StatisticsUtil.convertTableNameToObjects(tbl);
- Set<String> colNames = analyzeStmt.getColumnNames();
- Map<Long, AnalysisTaskInfo> analysisTaskInfos = new HashMap<>();
+ public void createAnalysisJob(AnalyzeStmt stmt) throws DdlException {
long jobId = Env.getCurrentEnv().getNextId();
- createTaskForEachColumns(analyzeStmt, catalogName, db, tbl, colNames,
analysisTaskInfos, jobId);
- createTaskForMVIdx(analyzeStmt, catalogName, db, tbl,
analysisTaskInfos, jobId);
- persistAnalysisJob(catalogName, db, tbl, jobId);
+ AnalysisTaskInfoBuilder taskInfoBuilder = buildCommonTaskInfo(stmt,
jobId);
+ Map<Long, AnalysisTaskInfo> analysisTaskInfos = new HashMap<>();
+
+ // start build analysis tasks
+ createTaskForEachColumns(stmt.getColumnNames(), taskInfoBuilder,
analysisTaskInfos);
+ createTaskForMVIdx(stmt.getTable(), taskInfoBuilder,
analysisTaskInfos, stmt.getAnalysisType());
+ persistAnalysisJob(taskInfoBuilder);
- if (analyzeStmt.isSync()) {
+ if (stmt.isSync()) {
syncExecute(analysisTaskInfos.values());
return;
}
@@ -111,42 +109,79 @@ public class AnalysisManager {
analysisTaskInfos.values().forEach(taskScheduler::schedule);
}
- private void persistAnalysisJob(String catalogName, String db, TableName
tbl,
- long jobId) throws DdlException {
+ private AnalysisTaskInfoBuilder buildCommonTaskInfo(AnalyzeStmt stmt, long
jobId) {
+ AnalysisTaskInfoBuilder taskInfoBuilder = new
AnalysisTaskInfoBuilder();
+ String catalogName = stmt.getCatalogName();
+ String db = stmt.getDBName();
+ TableName tbl = stmt.getTblName();
+ StatisticsUtil.convertTableNameToObjects(tbl);
+ String tblName = tbl.getTbl();
+ int samplePercent = stmt.getSamplePercent();
+ int sampleRows = stmt.getSampleRows();
+ AnalysisType analysisType = stmt.getAnalysisType();
+ AnalysisMethod analysisMethod = stmt.getAnalysisMethod();
+
+ taskInfoBuilder.setJobId(jobId);
+ taskInfoBuilder.setCatalogName(catalogName);
+ taskInfoBuilder.setDbName(db);
+ taskInfoBuilder.setTblName(tblName);
+ taskInfoBuilder.setJobType(JobType.MANUAL);
+ taskInfoBuilder.setState(AnalysisState.PENDING);
+ taskInfoBuilder.setScheduleType(ScheduleType.ONCE);
+
+ if (analysisMethod == AnalysisMethod.SAMPLE) {
+ taskInfoBuilder.setAnalysisMethod(AnalysisMethod.SAMPLE);
+ taskInfoBuilder.setSamplePercent(samplePercent);
+ taskInfoBuilder.setSampleRows(sampleRows);
+ } else {
+ taskInfoBuilder.setAnalysisMethod(AnalysisMethod.FULL);
+ }
+
+ if (analysisType == AnalysisType.HISTOGRAM) {
+ taskInfoBuilder.setAnalysisType(AnalysisType.HISTOGRAM);
+ int numBuckets = stmt.getNumBuckets();
+ int maxBucketNum = numBuckets > 0 ? numBuckets
+ : StatisticConstants.HISTOGRAM_MAX_BUCKET_NUM;
+ taskInfoBuilder.setMaxBucketNum(maxBucketNum);
+ } else {
+ taskInfoBuilder.setAnalysisType(AnalysisType.COLUMN);
+ }
+
+ return taskInfoBuilder;
+ }
+
+ private void persistAnalysisJob(AnalysisTaskInfoBuilder taskInfoBuilder)
throws DdlException {
try {
- AnalysisTaskInfo analysisTaskInfo = new
AnalysisTaskInfoBuilder().setJobId(
- jobId).setTaskId(-1)
- .setCatalogName(catalogName).setDbName(db)
- .setTblName(tbl.getTbl())
- .setJobType(JobType.MANUAL)
-
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.INDEX)
- .setScheduleType(ScheduleType.ONCE).build();
+ AnalysisTaskInfoBuilder jobInfoBuilder = taskInfoBuilder.copy();
+ AnalysisTaskInfo analysisTaskInfo =
jobInfoBuilder.setTaskId(-1).build();
StatisticsRepository.persistAnalysisTask(analysisTaskInfo);
} catch (Throwable t) {
throw new DdlException(t.getMessage(), t);
}
}
- private void createTaskForMVIdx(AnalyzeStmt analyzeStmt, String
catalogName, String db, TableName tbl,
- Map<Long, AnalysisTaskInfo> analysisTaskInfos, long jobId) throws
DdlException {
- if (!(analyzeStmt.isWholeTbl &&
analyzeStmt.getTable().getType().equals(TableType.OLAP))) {
+ private void createTaskForMVIdx(TableIf table, AnalysisTaskInfoBuilder
taskInfoBuilder,
+ Map<Long, AnalysisTaskInfo> analysisTaskInfos, AnalysisType
analysisType) throws DdlException {
+ TableType type = table.getType();
+ if (analysisType != AnalysisType.INDEX ||
!type.equals(TableType.OLAP)) {
+ // not need to collect statistics for materialized view
return;
}
- OlapTable olapTable = (OlapTable) analyzeStmt.getTable();
+
+ taskInfoBuilder.setAnalysisType(analysisType);
+ OlapTable olapTable = (OlapTable) table;
+
try {
olapTable.readLock();
for (MaterializedIndexMeta meta :
olapTable.getIndexIdToMeta().values()) {
if (meta.getDefineStmt() == null) {
continue;
}
+ AnalysisTaskInfoBuilder indexTaskInfoBuilder =
taskInfoBuilder.copy();
+ long indexId = meta.getIndexId();
long taskId = Env.getCurrentEnv().getNextId();
- AnalysisTaskInfo analysisTaskInfo = new
AnalysisTaskInfoBuilder().setJobId(
- jobId).setTaskId(taskId)
- .setCatalogName(catalogName).setDbName(db)
- .setTblName(tbl.getTbl())
-
.setIndexId(meta.getIndexId()).setJobType(JobType.MANUAL)
-
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.INDEX)
- .setScheduleType(ScheduleType.ONCE).build();
+ AnalysisTaskInfo analysisTaskInfo =
indexTaskInfoBuilder.setIndexId(indexId)
+ .setTaskId(taskId).build();
try {
StatisticsRepository.persistAnalysisTask(analysisTaskInfo);
} catch (Exception e) {
@@ -159,19 +194,14 @@ public class AnalysisManager {
}
}
- private void createTaskForEachColumns(AnalyzeStmt analyzeStmt, String
catalogName, String db, TableName tbl,
- Set<String> colNames, Map<Long, AnalysisTaskInfo>
analysisTaskInfos,
- long jobId) throws DdlException {
+ private void createTaskForEachColumns(Set<String> colNames,
AnalysisTaskInfoBuilder taskInfoBuilder,
+ Map<Long, AnalysisTaskInfo> analysisTaskInfos) throws DdlException
{
for (String colName : colNames) {
+ AnalysisTaskInfoBuilder colTaskInfoBuilder =
taskInfoBuilder.copy();
+ long indexId = -1;
long taskId = Env.getCurrentEnv().getNextId();
- AnalysisType analType = analyzeStmt.isHistogram ?
AnalysisType.HISTOGRAM : AnalysisType.COLUMN;
- AnalysisTaskInfo analysisTaskInfo = new
AnalysisTaskInfoBuilder().setJobId(jobId)
-
.setTaskId(taskId).setCatalogName(catalogName).setDbName(db)
- .setTblName(tbl.getTbl()).setColName(colName)
- .setJobType(JobType.MANUAL)
-
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(analType)
- .setState(AnalysisState.PENDING)
- .setScheduleType(ScheduleType.ONCE).build();
+ AnalysisTaskInfo analysisTaskInfo =
colTaskInfoBuilder.setColName(colName)
+ .setIndexId(indexId).setTaskId(taskId).build();
try {
StatisticsRepository.persistAnalysisTask(analysisTaskInfo);
} catch (Exception e) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
index 004d51b55e..c0f04b5eb5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
@@ -70,9 +70,11 @@ public class AnalysisTaskInfo {
public final AnalysisType analysisType;
- // TODO: define constants or get them from configuration properties
- public final double sampleRate = 1.0;
- public final int maxBucketNum = 128;
+ public final int samplePercent;
+
+ public final int sampleRows;
+
+ public final int maxBucketNum;
public String message;
@@ -84,9 +86,9 @@ public class AnalysisTaskInfo {
public final ScheduleType scheduleType;
public AnalysisTaskInfo(long jobId, long taskId, String catalogName,
String dbName, String tblName,
- String colName, Long indexId, JobType jobType,
- AnalysisMethod analysisMethod, AnalysisType analysisType, String
message,
- int lastExecTimeInMs, AnalysisState state, ScheduleType
scheduleType) {
+ String colName, Long indexId, JobType jobType, AnalysisMethod
analysisMethod,
+ AnalysisType analysisType, int samplePercent, int sampleRows, int
maxBucketNum,
+ String message, int lastExecTimeInMs, AnalysisState state,
ScheduleType scheduleType) {
this.jobId = jobId;
this.taskId = taskId;
this.catalogName = catalogName;
@@ -97,6 +99,9 @@ public class AnalysisTaskInfo {
this.jobType = jobType;
this.analysisMethod = analysisMethod;
this.analysisType = analysisType;
+ this.samplePercent = samplePercent;
+ this.sampleRows = sampleRows;
+ this.maxBucketNum = maxBucketNum;
this.message = message;
this.lastExecTimeInMs = lastExecTimeInMs;
this.state = state;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
index e804388153..e03f4f8d63 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfoBuilder.java
@@ -33,6 +33,9 @@ public class AnalysisTaskInfoBuilder {
private JobType jobType;
private AnalysisMethod analysisMethod;
private AnalysisType analysisType;
+ private int maxBucketNum;
+ private int samplePercent;
+ private int sampleRows;
private String message;
private int lastExecTimeInMs;
private AnalysisState state;
@@ -88,6 +91,21 @@ public class AnalysisTaskInfoBuilder {
return this;
}
+ public AnalysisTaskInfoBuilder setMaxBucketNum(int maxBucketNum) {
+ this.maxBucketNum = maxBucketNum;
+ return this;
+ }
+
+ public AnalysisTaskInfoBuilder setSamplePercent(int samplePercent) {
+ this.samplePercent = samplePercent;
+ return this;
+ }
+
+ public AnalysisTaskInfoBuilder setSampleRows(int sampleRows) {
+ this.sampleRows = sampleRows;
+ return this;
+ }
+
public AnalysisTaskInfoBuilder setMessage(String message) {
this.message = message;
return this;
@@ -109,7 +127,29 @@ public class AnalysisTaskInfoBuilder {
}
public AnalysisTaskInfo build() {
- return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName,
tblName, colName,
- indexId, jobType, analysisMethod, analysisType, message,
lastExecTimeInMs, state, scheduleType);
+ return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName,
tblName,
+ colName, indexId, jobType, analysisMethod, analysisType,
samplePercent,
+ sampleRows, maxBucketNum, message, lastExecTimeInMs, state,
scheduleType);
+ }
+
+ public AnalysisTaskInfoBuilder copy() {
+ return new AnalysisTaskInfoBuilder()
+ .setJobId(jobId)
+ .setTaskId(taskId)
+ .setCatalogName(catalogName)
+ .setDbName(dbName)
+ .setTblName(tblName)
+ .setColName(colName)
+ .setIndexId(indexId)
+ .setJobType(jobType)
+ .setAnalysisMethod(analysisMethod)
+ .setAnalysisType(analysisType)
+ .setSamplePercent(samplePercent)
+ .setSampleRows(sampleRows)
+ .setMaxBucketNum(maxBucketNum)
+ .setMessage(message)
+ .setLastExecTimeInMs(lastExecTimeInMs)
+ .setState(state)
+ .setScheduleType(scheduleType);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 001d1339a6..53c268426d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -24,6 +24,7 @@ import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.qe.StmtExecutor;
+import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod;
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
import com.google.common.annotations.VisibleForTesting;
@@ -189,4 +190,15 @@ public abstract class BaseAnalysisTask {
return unsupportedType.contains(type);
}
+ protected String getSampleExpression() {
+ if (info.analysisMethod == AnalysisMethod.FULL) {
+ return "";
+ }
+ // TODO Add sampling methods for external tables
+ if (info.samplePercent > 0) {
+ return String.format("TABLESAMPLE(%d PERCENT)",
info.samplePercent);
+ } else {
+ return String.format("TABLESAMPLE(%d ROWS)", info.sampleRows);
+ }
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
index 5adb34d4b0..58f9ad01a7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
@@ -19,8 +19,7 @@ package org.apache.doris.statistics;
import org.apache.doris.catalog.Env;
import org.apache.doris.common.FeConstants;
-import org.apache.doris.qe.AutoCloseConnectContext;
-import org.apache.doris.qe.StmtExecutor;
+import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.annotations.VisibleForTesting;
@@ -28,8 +27,6 @@ import org.apache.commons.text.StringSubstitutor;
import java.util.HashMap;
import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
/**
* Each task analyze one column.
@@ -49,10 +46,7 @@ public class HistogramTask extends BaseAnalysisTask {
+ " HISTOGRAM(`${colName}`, ${maxBucketNum}) AS buckets, "
+ " NOW() AS create_time "
+ "FROM "
- + " `${dbName}`.`${tblName}`";
-
- private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART =
ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE
- + " PARTITION (${partName})";
+ + " `${dbName}`.`${tblName}` ${sampleExpr}";
@VisibleForTesting
public HistogramTask() {
@@ -71,44 +65,29 @@ public class HistogramTask extends BaseAnalysisTask {
params.put("catalogId", String.valueOf(catalog.getId()));
params.put("dbId", String.valueOf(db.getId()));
params.put("tblId", String.valueOf(tbl.getId()));
- params.put("idxId", "-1");
+ params.put("idxId", String.valueOf(info.indexId));
params.put("colId", String.valueOf(info.colName));
params.put("dbName", info.dbName);
params.put("tblName", String.valueOf(info.tblName));
params.put("colName", String.valueOf(info.colName));
- params.put("sampleRate", String.valueOf(info.sampleRate));
+ params.put("sampleRate", getSampleRateFunction());
+ params.put("sampleExpr", getSampleExpression());
params.put("maxBucketNum", String.valueOf(info.maxBucketNum));
- params.put("percentValue", String.valueOf((int) (info.sampleRate *
100)));
- String histogramSql;
- Set<String> partitionNames = tbl.getPartitionNames();
+ StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
+
StatisticsUtil.execUpdate(stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE));
+
Env.getCurrentEnv().getStatisticsCache().refreshHistogramSync(tbl.getId(), -1,
col.getName());
+ }
- if (partitionNames.isEmpty()) {
- StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
- histogramSql =
stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE);
- } else {
- try {
- tbl.readLock();
- String partNames = partitionNames.stream()
- .filter(x -> tbl.getPartition(x) != null)
- .map(partName -> "`" + partName + "`")
- .collect(Collectors.joining(","));
- params.put("partName", partNames);
- StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
- histogramSql =
stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART);
- } finally {
- tbl.readUnlock();
- }
+ private String getSampleRateFunction() {
+ if (info.analysisMethod == AnalysisMethod.FULL) {
+ return "0";
}
-
- LOG.info("SQL to collect the histogram:\n {}", histogramSql);
-
- try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext())
{
- r.connectContext.getSessionVariable().disableNereidsPlannerOnce();
- this.stmtExecutor = new StmtExecutor(r.connectContext,
histogramSql);
- this.stmtExecutor.execute();
+ if (info.samplePercent > 0) {
+ return String.valueOf(info.samplePercent / 100.0);
+ } else {
+ double sampRate = (double) info.sampleRows / tbl.getRowCount();
+ return sampRate >= 1 ? "1.0" : String.format("%.4f", sampRate);
}
-
-
Env.getCurrentEnv().getStatisticsCache().refreshHistogramSync(tbl.getId(), -1,
col.getName());
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
index 62eb72ec9e..a3bac1bbc8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java
@@ -50,11 +50,11 @@ import java.util.Map;
public class MVAnalysisTask extends BaseAnalysisTask {
private static final String ANALYZE_MV_PART = INSERT_PART_STATISTICS
- + " FROM (${sql}) mv";
+ + " FROM (${sql}) mv ${sampleExpr}";
private static final String ANALYZE_MV_COL = INSERT_COL_STATISTICS
+ " (SELECT NDV(`${colName}`) AS ndv "
- + " FROM (${sql}) mv) t2\n";
+ + " FROM (${sql}) mv) t2";
private MaterializedIndexMeta meta;
@@ -118,9 +118,11 @@ public class MVAnalysisTask extends BaseAnalysisTask {
params.put("colName", colName);
params.put("tblName", String.valueOf(info.tblName));
params.put("sql", sql);
+ params.put("sampleExpr", getSampleExpression());
StatisticsUtil.execUpdate(ANALYZE_MV_PART, params);
}
params.remove("partId");
+ params.remove("sampleExpr");
params.put("type", column.getType().toString());
StatisticsUtil.execUpdate(ANALYZE_MV_COL, params);
Env.getCurrentEnv().getStatisticsCache()
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index de848a2592..24e30e2126 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -40,7 +40,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
private static final String ANALYZE_PARTITION_SQL_TEMPLATE =
INSERT_PART_STATISTICS
+ "FROM `${dbName}`.`${tblName}` "
- + "PARTITION ${partName}";
+ + "PARTITION ${partName} ${sampleExpr}";
// TODO Currently, NDV is computed for the full table; in fact,
// NDV should only be computed for the relevant partition.
@@ -64,12 +64,13 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
params.put("catalogId", String.valueOf(catalog.getId()));
params.put("dbId", String.valueOf(db.getId()));
params.put("tblId", String.valueOf(tbl.getId()));
- params.put("idxId", "-1");
+ params.put("idxId", String.valueOf(info.indexId));
params.put("colId", String.valueOf(info.colName));
params.put("dataSizeFunction", getDataSizeFunction(col));
params.put("dbName", info.dbName);
params.put("colName", String.valueOf(info.colName));
params.put("tblName", String.valueOf(info.tblName));
+ params.put("sampleExpr", getSampleExpression());
List<String> partitionAnalysisSQLs = new ArrayList<>();
try {
tbl.readLock();
@@ -90,6 +91,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
}
execSQLs(partitionAnalysisSQLs);
params.remove("partId");
+ params.remove("sampleExpr");
params.put("type", col.getType().toString());
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
String sql = stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index d5d473eac6..46bf364f7c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -68,4 +68,5 @@ public class StatisticConstants {
public static final int FETCH_LIMIT = 10000;
public static final int FETCH_INTERVAL_IN_MS = 500;
+ public static final int HISTOGRAM_MAX_BUCKET_NUM = 128;
}
diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex
b/fe/fe-core/src/main/jflex/sql_scanner.flex
index ec6511411c..62a237e3b3 100644
--- a/fe/fe-core/src/main/jflex/sql_scanner.flex
+++ b/fe/fe-core/src/main/jflex/sql_scanner.flex
@@ -418,6 +418,7 @@ import org.apache.doris.qe.SqlModeHelper;
keywordMap.put("soname", new Integer(SqlParserSymbols.KW_SONAME));
keywordMap.put("split", new Integer(SqlParserSymbols.KW_SPLIT));
keywordMap.put("sql_block_rule", new
Integer(SqlParserSymbols.KW_SQL_BLOCK_RULE));
+ keywordMap.put("sample", new Integer(SqlParserSymbols.KW_SAMPLE));
keywordMap.put("start", new Integer(SqlParserSymbols.KW_START));
keywordMap.put("stats", new Integer(SqlParserSymbols.KW_STATS));
keywordMap.put("status", new Integer(SqlParserSymbols.KW_STATUS));
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java
index caf316429f..78d4e829ab 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java
@@ -75,7 +75,7 @@ public class HistogramTaskTest extends TestWithFeService {
AnalysisManager analysisManager =
Env.getCurrentEnv().getAnalysisManager();
StmtExecutor executor = getSqlStmtExecutor(
- "ANALYZE TABLE t1 UPDATE HISTOGRAM ON col1 PARTITION
(p_201701)");
+ "ANALYZE TABLE t1(col1) UPDATE HISTOGRAM");
Assertions.assertNotNull(executor);
ConcurrentMap<Long, Map<Long, AnalysisTaskInfo>> taskMap =
diff --git a/regression-test/data/statistics/show_stats_test.out
b/regression-test/data/statistics/show_stats_test.out
new file mode 100644
index 0000000000..577d4e5090
--- /dev/null
+++ b/regression-test/data/statistics/show_stats_test.out
@@ -0,0 +1,7 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+city 6.0 4.0 0.0 42.0 6.0 '上海' '深圳'
+
+-- !sql --
+city VARCHAR(20) 0.0 4
[{"lower_expr":"上海","upper_expr":"上海","count":1.0,"pre_sum":0.0,"ndv":1.0},{"lower_expr":"北京","upper_expr":"北京","count":2.0,"pre_sum":1.0,"ndv":1.0},{"lower_expr":"广州","upper_expr":"广州","count":1.0,"pre_sum":3.0,"ndv":1.0},{"lower_expr":"深圳","upper_expr":"深圳","count":2.0,"pre_sum":4.0,"ndv":1.0}]
+
diff --git a/regression-test/suites/statistics/alter_col_stats.groovy
b/regression-test/suites/statistics/alter_col_stats.groovy
index 958e3341d4..3babc9ebbb 100644
--- a/regression-test/suites/statistics/alter_col_stats.groovy
+++ b/regression-test/suites/statistics/alter_col_stats.groovy
@@ -34,7 +34,7 @@ suite("alter_column_stats") {
sql """INSERT INTO statistics_test VALUES(3, 'c', '2013-01-01')"""
sql """ANALYZE TABLE statistics_test"""
- sql """ANALYZE TABLE statistics_test UPDATE HISTOGRAM ON col1,col2"""
+ sql """ANALYZE TABLE statistics_test(col1, col2) UPDATE HISTOGRAM"""
sleep(9000)
diff --git a/regression-test/suites/statistics/analyze_test.groovy
b/regression-test/suites/statistics/analyze_test.groovy
index f8414d9f68..d348f78ba3 100644
--- a/regression-test/suites/statistics/analyze_test.groovy
+++ b/regression-test/suites/statistics/analyze_test.groovy
@@ -29,6 +29,10 @@ suite("analyze_test") {
def tblName3 = "${dbName3}.analyze_test_tbl_3"
+ def dbName4 = "analyze_test_db_4"
+
+ def tblName4 = "${dbName4}.analyze_test_tbl_4"
+
sql """
DROP DATABASE IF EXISTS ${dbName1};
@@ -55,6 +59,13 @@ suite("analyze_test") {
CREATE DATABASE ${dbName3};
"""
+ sql """
+ DROP DATABASE IF EXISTS ${dbName4}
+ """
+
+ sql """
+ CREATE DATABASE ${dbName4};
+ """
sql """
DROP TABLE IF EXISTS ${tblName1}
@@ -95,6 +106,18 @@ suite("analyze_test") {
"enable_unique_key_merge_on_write"="true"
);"""
+ sql """
+ DROP TABLE IF EXISTS ${tblName4}
+ """
+
+ sql """CREATE TABLE ${tblName4} (analyze_test_col1 varchar(11451) not
null, analyze_test_col2 int not null, analyze_test_col3 int not null)
+ UNIQUE KEY(analyze_test_col1)
+ DISTRIBUTED BY HASH(analyze_test_col1)
+ BUCKETS 3
+ PROPERTIES(
+ "replication_num"="1",
+ "enable_unique_key_merge_on_write"="true"
+ );"""
sql """insert into ${tblName1} values(1, 2, 3);"""
sql """insert into ${tblName1} values(4, 5, 6);"""
@@ -114,6 +137,12 @@ suite("analyze_test") {
sql """insert into ${tblName3} values(3, 8, 2);"""
sql """insert into ${tblName3} values(5, 2, 1);"""
+ sql """insert into ${tblName4} values(1, 2, 3);"""
+ sql """insert into ${tblName4} values(4, 5, 6);"""
+ sql """insert into ${tblName4} values(7, 1, 9);"""
+ sql """insert into ${tblName4} values(3, 8, 2);"""
+ sql """insert into ${tblName4} values(5, 2, 1);"""
+
sql """
delete from __internal_schema.column_statistics where col_id in
('analyze_test_col1', 'analyze_test_col2', 'analyze_test_col3')
"""
@@ -142,6 +171,22 @@ suite("analyze_test") {
ALTER TABLE ${tblName3} DROP COLUMN analyze_test_col2;
"""
+ sql """
+ analyze sync table ${tblName4} with sample rows 5;
+ """
+
+ sql """
+ analyze table ${tblName4} with sync with incremental with sample
percent 20;
+ """
+
+ sql """
+ analyze sync table ${tblName4} update histogram with sample percent 20
with buckets 2;
+ """
+
+ sql """
+ analyze table ${tblName4} update histogram with sync with sample rows
20 with buckets 2;
+ """
+
sql """
DROP TABLE ${tblName2}
"""
@@ -150,6 +195,10 @@ suite("analyze_test") {
DROP DATABASE ${dbName1}
"""
+ sql """
+ DROP DATABASE ${dbName4}
+ """
+
sql """
DROP EXPIRED STATS
"""
diff --git a/regression-test/suites/statistics/show_stats_test.groovy
b/regression-test/suites/statistics/show_stats_test.groovy
new file mode 100644
index 0000000000..fb65741c21
--- /dev/null
+++ b/regression-test/suites/statistics/show_stats_test.groovy
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_show_stats") {
+ def dbName = "stats_test"
+ def tblName = "${dbName}.example_tbl"
+
+ sql "DROP DATABASE IF EXISTS ${dbName}"
+
+ sql "CREATE DATABASE IF NOT EXISTS ${dbName};"
+
+ sql "DROP TABLE IF EXISTS ${tblName}"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tblName} (
+ `user_id` LARGEINT NOT NULL,
+ `date` DATE NOT NULL,
+ `city` VARCHAR(20),
+ `age` SMALLINT,
+ `sex` TINYINT,
+ `last_visit_date` DATETIME REPLACE,
+ `cost` BIGINT SUM,
+ `max_dwell_time` INT MAX,
+ `min_dwell_time` INT MIN
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`)
+ PARTITION BY LIST(`date`)
+ (
+ PARTITION `p_201701` VALUES IN ("2017-10-01"),
+ PARTITION `p_201702` VALUES IN ("2017-10-02"),
+ PARTITION `p_201703` VALUES IN ("2017-10-03"),
+ PARTITION `default`
+ )
+ DISTRIBUTED BY HASH(`user_id`) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+
+ sql """
+ INSERT INTO ${tblName} (`user_id`, `date`, `city`, `age`,
+ `sex`, `last_visit_date`, `cost`,
+ `max_dwell_time`, `min_dwell_time`)
+ VALUES (10000, "2017-10-01", "北京", 20, 0, "2017-10-01 07:00:00", 15,
2, 2),
+ (10000, "2017-10-01", "北京", 20, 0, "2017-10-01 06:00:00", 20, 10,
10),
+ (10001, "2017-10-01", "北京", 30, 1, "2017-10-01 17:05:45", 2, 22,
22),
+ (10002, "2017-10-02", "上海", 20, 1, "2017-10-02 12:59:12", 200, 5,
5),
+ (10003, "2017-10-02", "广州", 32, 0, "2017-10-02 11:20:00", 30, 11,
11),
+ (10004, "2017-10-01", "深圳", 35, 0, "2017-10-01 10:00:15", 100, 3,
3),
+ (10004, "2017-10-03", "深圳", 35, 0, "2017-10-03 10:20:22", 11, 6,
6);
+ """
+
+ sql "ANALYZE sync TABLE ${tblName};"
+
+ sql "ANALYZE sync TABLE ${tblName} UPDATE HISTOGRAM;"
+
+ qt_sql "SHOW COLUMN STATS ${tblName}(city);"
+
+ qt_sql "SHOW COLUMN HISTOGRAM ${tblName}(city);"
+
+ sql "DROP DATABASE IF EXISTS ${dbName}"
+
+ sql "CREATE DATABASE IF NOT EXISTS ${dbName};"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]