This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 2b82bec4066 [fix](statistics)Add row count to AnalysisJob and update
tableStats using this row count after analyze. (#38428)
2b82bec4066 is described below
commit 2b82bec4066d5950911a2237939b4acb6fd2f593
Author: Jibing-Li <[email protected]>
AuthorDate: Sat Jul 27 11:32:29 2024 +0800
[fix](statistics)Add row count to AnalysisJob and update tableStats using
this row count after analyze. (#38428)
Add row count to AnalysisJob and update tableStats using this row count
after analyze.
Take row count to consideration in stats health calculation.
---
.../java/org/apache/doris/catalog/OlapTable.java | 27 ++++++++-
.../org/apache/doris/statistics/AnalysisInfo.java | 7 ++-
.../doris/statistics/AnalysisInfoBuilder.java | 9 ++-
.../apache/doris/statistics/AnalysisManager.java | 2 +
.../apache/doris/statistics/OlapAnalysisTask.java | 12 +++-
.../doris/statistics/StatisticsAutoCollector.java | 2 +
.../apache/doris/statistics/TableStatsMeta.java | 2 +-
.../doris/statistics/AnalysisManagerTest.java | 70 ++++++++++------------
.../doris/statistics/TableStatsMetaTest.java | 10 +---
.../suites/statistics/analyze_stats.groovy | 28 +++++++++
10 files changed, 113 insertions(+), 56 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 9a66cbc68ae..b0169571dfc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -1300,12 +1300,33 @@ public class OlapTable extends Table implements
MTMVRelatedTableIf {
.collect(Collectors.toSet())))) {
return true;
}
- long rowCount = getRowCount();
- if (rowCount > 0 && tblStats.rowCount == 0) {
+
+ // 1 Check row count.
+ long currentRowCount = getRowCount();
+ long lastAnalyzeRowCount = tblStats.rowCount;
+ // 1.1 Empty table -> non-empty table. Need analyze.
+ if (currentRowCount != 0 && lastAnalyzeRowCount == 0) {
+ return true;
+ }
+ // 1.2 Non-empty table -> empty table. Need analyze;
+ if (currentRowCount == 0 && lastAnalyzeRowCount != 0) {
+ return true;
+ }
+ // 1.3 Table is still empty. Not need to analyze. lastAnalyzeRowCount
== 0 is always true here.
+ if (currentRowCount == 0) {
+ return false;
+ }
+ // 1.4 If row count changed more than the threshold, need analyze.
+ // lastAnalyzeRowCount == 0 is always false here.
+ double changeRate =
+ ((double) Math.abs(currentRowCount - lastAnalyzeRowCount) /
lastAnalyzeRowCount) * 100.0;
+ if (changeRate > (100 -
StatisticsUtil.getTableStatsHealthThreshold())) {
return true;
}
+
+ // 2. Check update rows.
long updateRows = tblStats.updatedRows.get();
- int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
+ int tblHealth = StatisticsUtil.getTableHealth(currentRowCount,
updateRows);
return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index c167db2228d..ab7f9935c72 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -190,6 +190,9 @@ public class AnalysisInfo implements Writable {
@SerializedName("emptyJob")
public final boolean emptyJob;
+
+ @SerializedName("rowCount")
+ public final long rowCount;
/**
*
* Used to store the newest partition version of tbl when creating this
job.
@@ -206,7 +209,8 @@ public class AnalysisInfo implements Writable {
long lastExecTimeInMs, long timeCostInMs, AnalysisState state,
ScheduleType scheduleType,
boolean isExternalTableLevelTask, boolean partitionOnly, boolean
samplingPartition,
boolean isAllPartition, long partitionCount, CronExpression
cronExpression, boolean forceFull,
- boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean
emptyJob, boolean userInject) {
+ boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean
emptyJob, boolean userInject,
+ long rowCount) {
this.jobId = jobId;
this.taskId = taskId;
this.taskIds = taskIds;
@@ -244,6 +248,7 @@ public class AnalysisInfo implements Writable {
this.tblUpdateTime = tblUpdateTime;
this.emptyJob = emptyJob;
this.userInject = userInject;
+ this.rowCount = rowCount;
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index 00cf9f7b1bc..6541027538a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -64,6 +64,7 @@ public class AnalysisInfoBuilder {
private long tblUpdateTime;
private boolean emptyJob;
private boolean userInject;
+ private long rowCount;
public AnalysisInfoBuilder() {
}
@@ -103,6 +104,7 @@ public class AnalysisInfoBuilder {
tblUpdateTime = info.tblUpdateTime;
emptyJob = info.emptyJob;
userInject = info.userInject;
+ rowCount = info.rowCount;
}
public AnalysisInfoBuilder setJobId(long jobId) {
@@ -275,12 +277,17 @@ public class AnalysisInfoBuilder {
return this;
}
+ public AnalysisInfoBuilder setRowCount(long rowCount) {
+ this.rowCount = rowCount;
+ return this;
+ }
+
public AnalysisInfo build() {
return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId,
tblId, jobColumns, partitionNames,
colName, indexId, jobType, analysisMode, analysisMethod,
analysisType, samplePercent,
sampleRows, maxBucketNum, periodTimeInMs, message,
lastExecTimeInMs, timeCostInMs, state, scheduleType,
externalTableLevelTask, partitionOnly, samplingPartition,
isAllPartition, partitionCount,
- cronExpression, forceFull, usingSqlForPartitionColumn,
tblUpdateTime, emptyJob, userInject);
+ cronExpression, forceFull, usingSqlForPartitionColumn,
tblUpdateTime, emptyJob, userInject, rowCount);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 06f6ca331b3..71f5ce0fa87 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -354,6 +354,8 @@ public class AnalysisManager implements Writable {
infoBuilder.setTblUpdateTime(table.getUpdateTime());
infoBuilder.setEmptyJob(table instanceof OlapTable &&
table.getRowCount() == 0
&& analysisMethod.equals(AnalysisMethod.SAMPLE));
+ long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0
: table.getRowCount();
+ infoBuilder.setRowCount(rowCount);
return infoBuilder.build();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index dffba735fe9..c9b5edaff22 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -19,6 +19,7 @@ package org.apache.doris.statistics;
import org.apache.doris.analysis.CreateMaterializedViewStmt;
import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.MaterializedIndex;
import org.apache.doris.catalog.MaterializedIndexMeta;
@@ -37,7 +38,6 @@ import org.apache.commons.text.StringSubstitutor;
import java.security.SecureRandom;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@@ -69,10 +69,16 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
return;
}
List<Pair<String, String>> columnList = info.jobColumns;
- if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod) ||
columnList == null || columnList.isEmpty()) {
+ if (columnList == null || columnList.isEmpty()) {
+ LOG.warn("Table {}.{}.{}, jobColumns is null or empty.",
info.catalogId, info.dbId, info.tblId);
+ throw new RuntimeException();
+ }
+ if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod)) {
StatsId statsId = new StatsId(concatColumnStatsId(),
info.catalogId, info.dbId,
info.tblId, info.indexId, info.colName, null);
- job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
+ ColStatsData colStatsData = new ColStatsData(statsId);
+
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
+ job.appendBuf(this, Collections.singletonList(colStatsData));
return;
}
if (tableSample != null) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index 9ca971845b7..4408a0d9255 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -181,6 +181,7 @@ public class StatisticsAutoCollector extends
StatisticsCollector {
List<AnalysisInfo> analysisInfos, TableIf table) {
AnalysisMethod analysisMethod = table.getDataSize(true) >=
StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
+ long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0
: table.getRowCount();
AnalysisInfo jobInfo = new AnalysisInfoBuilder()
.setJobId(Env.getCurrentEnv().getNextId())
.setCatalogId(db.getCatalog().getId())
@@ -200,6 +201,7 @@ public class StatisticsAutoCollector extends
StatisticsCollector {
.setTblUpdateTime(table.getUpdateTime())
.setEmptyJob(table instanceof OlapTable && table.getRowCount()
== 0
&& analysisMethod.equals(AnalysisMethod.SAMPLE))
+ .setRowCount(rowCount)
.build();
analysisInfos.add(jobInfo);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index 3b9b1e2bead..a9a580c8b40 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -141,7 +141,7 @@ public class TableStatsMeta implements Writable {
jobType = analyzedJob.jobType;
if (tableIf != null) {
if (tableIf instanceof OlapTable) {
- rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount();
+ rowCount = analyzedJob.rowCount;
}
if (analyzedJob.emptyJob) {
return;
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
index 674456b0b46..8a803bd2a30 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
@@ -34,12 +34,12 @@ import
org.apache.doris.statistics.AnalysisInfo.ScheduleType;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
import mockit.Expectations;
import mockit.Injectable;
import mockit.Mock;
import mockit.MockUp;
import mockit.Mocked;
+import org.apache.hadoop.util.Lists;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -48,7 +48,6 @@ import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Set;
// CHECKSTYLE OFF
public class AnalysisManagerTest {
@@ -265,63 +264,58 @@ public class AnalysisManagerTest {
public void testReAnalyze() {
new MockUp<OlapTable>() {
+ int count = 0;
+ int[] rowCount = new int[]{100, 200, 1, 0, 0, 100};
+
final Column c = new Column("col1", PrimitiveType.INT);
@Mock
- public List<Column> getBaseSchema() {
- return Lists.newArrayList(c);
+ public long getRowCount() {
+ return rowCount[count++];
}
@Mock
- public List<Column> getColumns() { return Lists.newArrayList(c); }
-
- @Mock
- public List<Pair<String, String>> getColumnIndexPairs(Set<String>
columns) {
- List<Pair<String, String>> jobList = Lists.newArrayList();
- jobList.add(Pair.of("1", "1"));
- jobList.add(Pair.of("2", "2"));
- jobList.add(Pair.of("3", "3"));
- return jobList;
+ public List<Column> getBaseSchema() {
+ return org.apache.hadoop.util.Lists.newArrayList(c);
}
- };
- OlapTable olapTable = new OlapTable();
- List<Pair<String, String>> jobList = Lists.newArrayList();
- jobList.add(Pair.of("1", "1"));
- jobList.add(Pair.of("2", "2"));
- TableStatsMeta stats0 = new TableStatsMeta(
- 0, new AnalysisInfoBuilder().setJobColumns(jobList)
- .setColName("col1").build(), olapTable);
- Assertions.assertTrue(olapTable.needReAnalyzeTable(stats0));
- new MockUp<OlapTable>() {
- int count = 0;
- int[] rowCount = new int[]{100, 100, 200, 200, 1, 1};
-
- @Mock
- public long getRowCount() {
- return rowCount[count++];
- }
@Mock
- public List<Pair<String, String>> getColumnIndexPairs(Set<String>
columns) {
- List<Pair<String, String>> jobList = Lists.newArrayList();
- return jobList;
+ public List<Column> getColumns() {
+ return Lists.newArrayList(c);
}
+
};
+ OlapTable olapTable = new OlapTable();
TableStatsMeta stats1 = new TableStatsMeta(
50, new AnalysisInfoBuilder().setJobColumns(new ArrayList<>())
- .setColName("col1").build(), olapTable);
- stats1.updatedRows.addAndGet(50);
+ .setColName("col1").setRowCount(100).build(), olapTable);
+ stats1.updatedRows.addAndGet(70);
Assertions.assertTrue(olapTable.needReAnalyzeTable(stats1));
TableStatsMeta stats2 = new TableStatsMeta(
- 190, new AnalysisInfoBuilder()
- .setJobColumns(new ArrayList<>()).setColName("col1").build(),
olapTable);
+ 190, new AnalysisInfoBuilder().setJobColumns(new ArrayList<>())
+ .setColName("col1").setRowCount(200).build(), olapTable);
stats2.updatedRows.addAndGet(20);
Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2));
TableStatsMeta stats3 = new TableStatsMeta(0, new AnalysisInfoBuilder()
- .setJobColumns(new
ArrayList<>()).setEmptyJob(true).setColName("col1").build(), olapTable);
+ .setEmptyJob(true).setColName("col1").setJobColumns(new
ArrayList<>())
+ .setRowCount(0).build(), olapTable);
Assertions.assertTrue(olapTable.needReAnalyzeTable(stats3));
+ TableStatsMeta stats4 = new TableStatsMeta(0, new AnalysisInfoBuilder()
+ .setEmptyJob(true).setColName("col1").setJobColumns(new
ArrayList<>())
+ .setRowCount(1).build(), olapTable);
+ Assertions.assertTrue(olapTable.needReAnalyzeTable(stats4));
+
+ TableStatsMeta stats5 = new TableStatsMeta(0, new AnalysisInfoBuilder()
+ .setEmptyJob(true).setColName("col1").setJobColumns(new
ArrayList<>())
+ .setRowCount(0).build(), olapTable);
+ Assertions.assertFalse(olapTable.needReAnalyzeTable(stats5));
+
+ TableStatsMeta stats6 = new TableStatsMeta(0, new AnalysisInfoBuilder()
+ .setEmptyJob(true).setColName("col1").setJobColumns(new
ArrayList<>())
+ .setRowCount(30).build(), olapTable);
+ Assertions.assertTrue(olapTable.needReAnalyzeTable(stats6));
}
@Test
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
index 94eab9e00cc..349f415a3c4 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
@@ -19,8 +19,6 @@ package org.apache.doris.statistics;
import org.apache.doris.catalog.OlapTable;
-import mockit.Mock;
-import mockit.MockUp;
import mockit.Mocked;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -31,15 +29,9 @@ class TableStatsMetaTest {
@Test
void update(@Mocked OlapTable table) {
- new MockUp<OlapTable>() {
- @Mock
- public long getRowCount() {
- return 4;
- }
- };
TableStatsMeta tableStatsMeta = new TableStatsMeta();
AnalysisInfo jobInfo = new AnalysisInfoBuilder().setJobColumns(new
ArrayList<>())
- .setColName("col1").build();
+ .setColName("col1").setRowCount(4).build();
tableStatsMeta.update(jobInfo, table);
Assertions.assertEquals(4, tableStatsMeta.rowCount);
}
diff --git a/regression-test/suites/statistics/analyze_stats.groovy
b/regression-test/suites/statistics/analyze_stats.groovy
index db56b400ea9..5a6e753e0a3 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -2845,6 +2845,34 @@ PARTITION `p599` VALUES IN (599)
assertEquals("521779.0", alter_result[0][5])
assertEquals("7.142863009760572", alter_result[0][6])
+ // Test analyze after new empty partition created.
+ sql """CREATE TABLE `part` (
+ `id` INT NULL,
+ `colint` INT NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ PARTITION BY RANGE(`id`)
+ (PARTITION p1 VALUES [("-2147483648"), ("10000")),
+ PARTITION p2 VALUES [("10000"), ("20000")))
+ DISTRIBUTED BY HASH(`id`) BUCKETS 3
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """analyze table part with sync;"""
+ sql """Insert into part values (1, 1), (10001, 10001);"""
+ sql """analyze table part with sync;"""
+ sleep(1000)
+ sql """alter table part add partition p3 VALUES [("20000"), ("30000"));"""
+ sql """analyze table part with sync;"""
+ sql """analyze table part with sync;"""
+ def new_part_result = sql """show column stats part(id)"""
+ assertEquals("2.0", new_part_result[0][2])
+ new_part_result = sql """show column stats part(colint)"""
+ assertEquals("2.0", new_part_result[0][2])
+
sql """DROP DATABASE IF EXISTS trigger"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]