This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f4c5ce260b4 [fix](statistics)Fix rowCount==0 while analyzing bug
(#28969)
f4c5ce260b4 is described below
commit f4c5ce260b4ac4b3974bcd8ef7dcd059ecfdd78b
Author: Jibing-Li <[email protected]>
AuthorDate: Wed Dec 27 23:04:37 2023 +0800
[fix](statistics)Fix rowCount==0 while analyzing bug (#28969)
Sample analyzing need to get row count by using table.getRowCount(). This
method is not updated in real time, which may cause the sample task to scan
whole table.
This pr is to fix this. Set the flag that indicate the analyze job is for
an empty table and skip scan the table. Meanwhile, don't reset updatedRows in
this case.
Set hugeTableAutoAnalyzeIntervalInMillis = 0 because all default huge table
size has been set to 0.
---
docs/en/docs/query-acceleration/statistics.md | 4 ++--
docs/zh-CN/docs/query-acceleration/statistics.md | 4 ++--
.../src/main/java/org/apache/doris/qe/SessionVariable.java | 2 +-
.../main/java/org/apache/doris/statistics/AnalysisInfo.java | 7 ++++++-
.../java/org/apache/doris/statistics/AnalysisInfoBuilder.java | 10 ++++++++--
.../main/java/org/apache/doris/statistics/AnalysisManager.java | 1 +
.../java/org/apache/doris/statistics/OlapAnalysisTask.java | 3 ++-
.../java/org/apache/doris/statistics/StatisticConstants.java | 2 +-
.../org/apache/doris/statistics/StatisticsAutoCollector.java | 1 +
.../main/java/org/apache/doris/statistics/TableStatsMeta.java | 2 +-
.../apache/doris/statistics/StatisticsAutoCollectorTest.java | 2 +-
regression-test/suites/statistics/analyze_stats.groovy | 2 +-
12 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/docs/en/docs/query-acceleration/statistics.md
b/docs/en/docs/query-acceleration/statistics.md
index c7a58277580..4cb0891172d 100644
--- a/docs/en/docs/query-acceleration/statistics.md
+++ b/docs/en/docs/query-acceleration/statistics.md
@@ -295,8 +295,8 @@ mysql> KILL ANALYZE 52357;
|auto_analyze_end_time|End time for automatic statistics collection|23:59:59|
|enable_auto_analyze|Enable automatic collection functionality|true|
|huge_table_default_sample_rows|Sampling rows for large tables|4194304|
-|huge_table_lower_bound_size_in_bytes|Tables with size greater than this value
will be automatically sampled during collection of statistics|5368709120|
-|huge_table_auto_analyze_interval_in_millis|Controls the minimum time interval
for automatic ANALYZE on large tables. Tables with sizes greater than
`huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within
this time interval.|43200000|
+|huge_table_lower_bound_size_in_bytes|Tables with size greater than this value
will be automatically sampled during collection of statistics|0|
+|huge_table_auto_analyze_interval_in_millis|Controls the minimum time interval
for automatic ANALYZE on large tables. Tables with sizes greater than
`huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within
this time interval.|0|
|table_stats_health_threshold|Ranges from 0 to 100. If data updates since the
last statistics collection exceed `(100 - table_stats_health_threshold)%`, the
table's statistics are considered outdated.|60|
|analyze_timeout|Controls the timeout for synchronous ANALYZE in seconds|43200|
|auto_analyze_table_width_threshold|Controls the maximum width of table that
will be auto analyzed. Table with more columns than this value will not be auto
analyzed.|70|
diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md
b/docs/zh-CN/docs/query-acceleration/statistics.md
index 20b535e357b..bff100fa98a 100644
--- a/docs/zh-CN/docs/query-acceleration/statistics.md
+++ b/docs/zh-CN/docs/query-acceleration/statistics.md
@@ -299,8 +299,8 @@ mysql> KILL ANALYZE 52357;
|auto_analyze_end_time|自动统计信息收集结束时间|23:59:59|
|enable_auto_analyze|开启自动收集功能|true|
|huge_table_default_sample_rows|对大表的采样行数|4194304|
-|huge_table_lower_bound_size_in_bytes|大小超过该值的的表,在自动收集时将会自动通过采样收集统计信息|5368709120|
-|huge_table_auto_analyze_interval_in_millis|控制对大表的自动ANALYZE的最小时间间隔,在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes
* 5的表仅ANALYZE一次|43200000|
+|huge_table_lower_bound_size_in_bytes|大小超过该值的的表,在自动收集时将会自动通过采样收集统计信息|0|
+|huge_table_auto_analyze_interval_in_millis|控制对大表的自动ANALYZE的最小时间间隔,在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes
* 5的表仅ANALYZE一次|0|
|table_stats_health_threshold|取值在0-100之间,当自上次统计信息收集操作之后,数据更新量达到 (100 -
table_stats_health_threshold)% ,认为该表的统计信息已过时|60|
|analyze_timeout|控制ANALYZE超时时间,单位为秒|43200|
|auto_analyze_table_width_threshold|控制自动统计信息收集处理的最大表宽度,列数大于该值的表不会参与自动统计信息收集|70|
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 35545059901..c1ea2f29ff2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -1450,7 +1450,7 @@ public class SessionVariable implements Serializable,
Writable {
"This controls the minimum time interval for automatic
ANALYZE on large tables."
+ "Within this interval,"
+ "tables larger than
huge_table_lower_bound_size_in_bytes are analyzed only once."})
- public long hugeTableAutoAnalyzeIntervalInMillis =
TimeUnit.HOURS.toMillis(12);
+ public long hugeTableAutoAnalyzeIntervalInMillis =
TimeUnit.HOURS.toMillis(0);
@VariableMgr.VarAttr(name =
EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL,
description = {"控制对外表的自动ANALYZE的最小时间间隔,在该时间间隔内的外表仅ANALYZE一次",
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index 65bb4a5dd95..aaff9e59927 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -188,6 +188,9 @@ public class AnalysisInfo implements Writable {
@SerializedName("endTime")
public long endTime;
+
+ @SerializedName("emptyJob")
+ public final boolean emptyJob;
/**
*
* Used to store the newest partition version of tbl when creating this
job.
@@ -202,7 +205,7 @@ public class AnalysisInfo implements Writable {
long lastExecTimeInMs, long timeCostInMs, AnalysisState state,
ScheduleType scheduleType,
boolean isExternalTableLevelTask, boolean partitionOnly, boolean
samplingPartition,
boolean isAllPartition, long partitionCount, CronExpression
cronExpression, boolean forceFull,
- boolean usingSqlForPartitionColumn, long tblUpdateTime) {
+ boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean
emptyJob) {
this.jobId = jobId;
this.taskId = taskId;
this.taskIds = taskIds;
@@ -238,6 +241,7 @@ public class AnalysisInfo implements Writable {
this.forceFull = forceFull;
this.usingSqlForPartitionColumn = usingSqlForPartitionColumn;
this.tblUpdateTime = tblUpdateTime;
+ this.emptyJob = emptyJob;
}
@Override
@@ -279,6 +283,7 @@ public class AnalysisInfo implements Writable {
}
sj.add("forceFull: " + forceFull);
sj.add("usingSqlForPartitionColumn: " + usingSqlForPartitionColumn);
+ sj.add("emptyJob: " + emptyJob);
return sj.toString();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index 204aba6d0f8..310b7816ecd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -61,8 +61,8 @@ public class AnalysisInfoBuilder {
private CronExpression cronExpression;
private boolean forceFull;
private boolean usingSqlForPartitionColumn;
-
private long tblUpdateTime;
+ private boolean emptyJob;
public AnalysisInfoBuilder() {
}
@@ -100,6 +100,7 @@ public class AnalysisInfoBuilder {
forceFull = info.forceFull;
usingSqlForPartitionColumn = info.usingSqlForPartitionColumn;
tblUpdateTime = info.tblUpdateTime;
+ emptyJob = info.emptyJob;
}
public AnalysisInfoBuilder setJobId(long jobId) {
@@ -262,12 +263,17 @@ public class AnalysisInfoBuilder {
return this;
}
+ public AnalysisInfoBuilder setEmptyJob(boolean emptyJob) {
+ this.emptyJob = emptyJob;
+ return this;
+ }
+
public AnalysisInfo build() {
return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId,
tblId, colToPartitions, partitionNames,
colName, indexId, jobType, analysisMode, analysisMethod,
analysisType, samplePercent,
sampleRows, maxBucketNum, periodTimeInMs, message,
lastExecTimeInMs, timeCostInMs, state, scheduleType,
externalTableLevelTask, partitionOnly, samplingPartition,
isAllPartition, partitionCount,
- cronExpression, forceFull, usingSqlForPartitionColumn,
tblUpdateTime);
+ cronExpression, forceFull, usingSqlForPartitionColumn,
tblUpdateTime, emptyJob);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 0bf24e0c288..39ae191d45a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -519,6 +519,7 @@ public class AnalysisManager implements Writable {
infoBuilder.setColToPartitions(colToPartitions);
infoBuilder.setTaskIds(Lists.newArrayList());
infoBuilder.setTblUpdateTime(table.getUpdateTime());
+ infoBuilder.setEmptyJob(table instanceof OlapTable &&
table.getRowCount() == 0);
return infoBuilder.build();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index e062e4eef85..81348c1f948 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -61,7 +61,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
public void doExecute() throws Exception {
Set<String> partitionNames = info.colToPartitions.get(info.colName);
- if (partitionNames == null || partitionNames.isEmpty()) {
+ if ((info.emptyJob &&
info.analysisMethod.equals(AnalysisInfo.AnalysisMethod.SAMPLE))
+ || partitionNames == null || partitionNames.isEmpty()) {
if (partitionNames == null) {
LOG.warn("Table {}.{}.{}, partitionNames for column {} is
null. ColToPartitions:[{}]",
info.catalogId, info.dbId, info.tblId, info.colName,
info.colToPartitions);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index 3d6d2fe52aa..857a50e234c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -88,7 +88,7 @@ public class StatisticConstants {
public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304;
public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0;
- public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS =
TimeUnit.HOURS.toMillis(12);
+ public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS =
TimeUnit.HOURS.toMillis(0);
public static final long EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS =
TimeUnit.HOURS.toMillis(24);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index ee50471175d..f799da56206 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -170,6 +170,7 @@ public class StatisticsAutoCollector extends
StatisticsCollector {
.setLastExecTimeInMs(System.currentTimeMillis())
.setJobType(JobType.SYSTEM)
.setTblUpdateTime(table.getUpdateTime())
+ .setEmptyJob(table instanceof OlapTable && table.getRowCount()
== 0)
.build();
analysisInfos.add(jobInfo);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index f500ab09f0b..eb6672ffe18 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -149,7 +149,7 @@ public class TableStatsMeta implements Writable {
if (tableIf instanceof OlapTable) {
rowCount = tableIf.getRowCount();
}
- if (analyzedJob.colToPartitions.keySet()
+ if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet()
.containsAll(tableIf.getBaseSchema().stream()
.filter(c ->
!StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName).collect(Collectors.toSet()))) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
index 0b4b2203d0d..87342202fb2 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
@@ -299,7 +299,7 @@ public class StatisticsAutoCollectorTest {
// A very huge table has been updated recently, so we should skip it
this time
stats.updatedTime = System.currentTimeMillis() - 1000;
StatisticsAutoCollector autoCollector = new StatisticsAutoCollector();
- Assertions.assertTrue(autoCollector.skip(olapTable));
+ Assertions.assertFalse(autoCollector.skip(olapTable));
// The update of this huge table is long time ago, so we shouldn't
skip it this time
stats.updatedTime = System.currentTimeMillis()
- StatisticsUtil.getHugeTableAutoAnalyzeIntervalInMillis() -
10000;
diff --git a/regression-test/suites/statistics/analyze_stats.groovy
b/regression-test/suites/statistics/analyze_stats.groovy
index e7e89f858fb..64967280ce9 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -1168,7 +1168,7 @@ PARTITION `p599` VALUES IN (599)
sql """ INSERT INTO test_updated_rows SELECT * FROM test_updated_rows """
sql """ANALYZE TABLE test_updated_rows WITH SYNC"""
def cnt2 = sql """ SHOW TABLE STATS test_updated_rows """
- assertEquals(Integer.valueOf(cnt2[0][0]), 0)
+ assertTrue(Integer.valueOf(cnt2[0][0]) == 0 || Integer.valueOf(cnt2[0][0])
== 8)
// test analyze specific column
sql """CREATE TABLE test_analyze_specific_column (col1 varchar(11451) not
null, col2 int not null, col3 int not null)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]