This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5505fa3755f [Fix](statistics) Fix partition name NPE and sample for
all table during auto analyze (#28916)
5505fa3755f is described below
commit 5505fa3755fdc7aa2e88899943ad8f614dfbdf80
Author: Jibing-Li <[email protected]>
AuthorDate: Sun Dec 24 01:49:49 2023 +0800
[Fix](statistics) Fix partition name NPE and sample for all table during
auto analyze (#28916)
Fix partition name NPE and sample for all table during auto analyze.
Sample for all tables because getData may have latency, which may cause
full analyze a huge table and use too much resource. Sample for all tables to
avoid this. Will improve the strategy later.
---
fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java | 2 +-
.../src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java | 6 +++++-
.../main/java/org/apache/doris/statistics/StatisticConstants.java | 2 +-
.../java/org/apache/doris/statistics/StatisticsAutoCollector.java | 2 +-
.../test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java | 2 +-
.../test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java | 2 +-
.../org/apache/doris/statistics/StatisticsAutoCollectorTest.java | 2 +-
7 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 7d7fdb1404e..b9658bf34a5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -1422,7 +1422,7 @@ public class SessionVariable implements Serializable,
Writable {
+ "When enable_auto_sample is enabled, tables"
+ "larger than this value will automatically
collect "
+ "statistics through sampling"})
- public long hugeTableLowerBoundSizeInBytes = 5L * 1024 * 1024 * 1024;
+ public long hugeTableLowerBoundSizeInBytes = 0;
@VariableMgr.VarAttr(name = HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS,
flag = VariableMgr.GLOBAL,
description = {"控制对大表的自动ANALYZE的最小时间间隔,"
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 50042e4610f..e062e4eef85 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -61,7 +61,11 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
public void doExecute() throws Exception {
Set<String> partitionNames = info.colToPartitions.get(info.colName);
- if (partitionNames.isEmpty()) {
+ if (partitionNames == null || partitionNames.isEmpty()) {
+ if (partitionNames == null) {
+ LOG.warn("Table {}.{}.{}, partitionNames for column {} is
null. ColToPartitions:[{}]",
+ info.catalogId, info.dbId, info.tblId, info.colName,
info.colToPartitions);
+ }
StatsId statsId = new StatsId(concatColumnStatsId(),
info.catalogId, info.dbId,
info.tblId, info.indexId, info.colName, null);
job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index 111305b03ed..3d6d2fe52aa 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -86,7 +86,7 @@ public class StatisticConstants {
public static final int INSERT_MERGE_ITEM_COUNT = 200;
public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304;
- public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 5L * 1024
* 1024 * 1024;
+ public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0;
public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS =
TimeUnit.HOURS.toMillis(12);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index 244b1059d7e..ee50471175d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -148,7 +148,7 @@ public class StatisticsAutoCollector extends
StatisticsCollector {
protected void createAnalyzeJobForTbl(DatabaseIf<? extends TableIf> db,
List<AnalysisInfo> analysisInfos, TableIf table) {
- AnalysisMethod analysisMethod = table.getDataSize(true) >
StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
+ AnalysisMethod analysisMethod = table.getDataSize(true) >=
StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
AnalysisInfo jobInfo = new AnalysisInfoBuilder()
.setJobId(Env.getCurrentEnv().getNextId())
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
index a569a5cb06d..12a1a9c046b 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
@@ -86,7 +86,7 @@ public class HMSAnalysisTaskTest {
new MockUp<HMSExternalTable>() {
@Mock
public long getDataSize(boolean singleReplica) {
- return 1000;
+ return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
}
};
HMSAnalysisTask task = new HMSAnalysisTask();
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index 5b27c79c863..e0b5a4b0478 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -101,7 +101,7 @@ public class OlapAnalysisTaskTest {
@Mock
public long getDataSize(boolean singleReplica) {
- return 1000;
+ return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
}
};
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
index 9f6d3db74b5..0b4b2203d0d 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
@@ -346,7 +346,7 @@ public class StatisticsAutoCollectorTest {
@Mock
public long getDataSize(boolean singleReplica) {
- return 1000;
+ return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
}
@Mock
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]