This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 80a5692aa8c [statistic](cloud) Support force full analysis to sample 
(#32031)
80a5692aa8c is described below

commit 80a5692aa8c35ebc4e39f97f4d7e69412d09e3b3
Author: Gavin Chou <[email protected]>
AuthorDate: Sun Mar 10 17:40:11 2024 +0800

    [statistic](cloud) Support force full analysis to sample (#32031)
    
    Full analysis may take a lot of time due to lack of file cache in cloud
    mode, which may lead to analysis timeout when we run regression test.
    
    We can force sample analysis for the analysis statements such as
    `analyze table $table with sync` to get rid of analysis timeout.
---
 .../src/main/java/org/apache/doris/common/Config.java  |  3 +++
 .../org/apache/doris/analysis/AnalyzeProperties.java   |  4 ++++
 .../java/org/apache/doris/analysis/AnalyzeStmt.java    | 18 ++++++++++++++++++
 .../org/apache/doris/statistics/AnalysisManager.java   |  3 +++
 4 files changed, 28 insertions(+)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index aa935200e25..22d41e8664a 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -2259,6 +2259,9 @@ public class Config extends ConfigBase {
     @ConfField
     public static int cpu_resource_limit_per_analyze_task = 1;
 
+    @ConfField(mutable = true)
+    public static boolean force_sample_analyze = false; // avoid full analyze 
for performance reason
+
     @ConfField(mutable = true, description = {
             "Export任务允许的最大分区数量",
             "The maximum number of partitions allowed by Export job"})
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
index 4b5f161d2be..94083989ca7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
@@ -123,6 +123,10 @@ public class AnalyzeProperties {
         return Integer.parseInt(properties.get(PROPERTY_SAMPLE_ROWS));
     }
 
+    public void setSampleRows(long sampleRows) {
+        properties.put(PROPERTY_SAMPLE_ROWS, String.valueOf(sampleRows));
+    }
+
     public int getNumBuckets() {
         if (!properties.containsKey(PROPERTY_NUM_BUCKETS)) {
             return 0;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
index 2c4a51757cd..00226ce5803 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
@@ -18,16 +18,21 @@
 package org.apache.doris.analysis;
 
 
+import org.apache.doris.common.AnalysisException;
 import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
 import org.apache.doris.statistics.AnalysisInfo.AnalysisMode;
 import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
 import org.apache.doris.statistics.AnalysisInfo.ScheduleType;
+import org.apache.doris.statistics.util.StatisticsUtil;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.core.util.CronExpression;
 
 import java.util.Map;
 
 public class AnalyzeStmt extends StatementBase {
+    private static final Logger LOG = LogManager.getLogger(AnalyzeStmt.class);
 
     protected AnalyzeProperties analyzeProperties;
 
@@ -35,6 +40,19 @@ public class AnalyzeStmt extends StatementBase {
         this.analyzeProperties = analyzeProperties;
     }
 
+    public void checkAndSetSample() throws AnalysisException {
+        if (analyzeProperties.forceFull()) {
+            // if the user trys hard to do full, we stop him hard.
+            throw new AnalysisException(
+                    "analyze with full is forbidden for performance issue in 
cloud mode, use `with sample` then");
+        }
+        if (!analyzeProperties.isSample()) {
+            // otherwise, we gently translate it to use sample
+            LOG.warn("analyze with full is forbidden for performance issue in 
cloud mode, force to use sample");
+            
analyzeProperties.setSampleRows(StatisticsUtil.getHugeTableSampleRows());
+        }
+    }
+
     public Map<String, String> getProperties() {
         return analyzeProperties.getProperties();
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 6bdf6fdb771..f90ce83a088 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -144,6 +144,9 @@ public class AnalysisManager implements Writable {
         if (!StatisticsUtil.statsTblAvailable() && 
!FeConstants.runningUnitTest) {
             throw new DdlException("Stats table not available, please make 
sure your cluster status is normal");
         }
+        if (Config.force_sample_analyze) {
+            analyzeStmt.checkAndSetSample();
+        }
         if (analyzeStmt instanceof AnalyzeDBStmt) {
             createAnalysisJobs((AnalyzeDBStmt) analyzeStmt, proxy);
         } else if (analyzeStmt instanceof AnalyzeTblStmt) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to