This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new bc1f6b32b94 branch-3.0: [improvement](statistics)Add session variable 
for partition sample count. #48218 (#49091)
bc1f6b32b94 is described below

commit bc1f6b32b94081ee52fb7351e6d1f4bd9802f91d
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Mar 14 20:29:04 2025 +0800

    branch-3.0: [improvement](statistics)Add session variable for partition 
sample count. #48218 (#49091)
    
    Cherry-picked from #48218
    
    Co-authored-by: James <[email protected]>
---
 .../main/java/org/apache/doris/qe/SessionVariable.java | 14 ++++++++++++++
 .../org/apache/doris/statistics/OlapAnalysisTask.java  |  4 ++--
 .../apache/doris/statistics/StatisticConstants.java    |  4 ++++
 .../apache/doris/statistics/util/StatisticsUtil.java   | 18 ++++++++++++++++++
 4 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 666bcd43b15..326f75aba3d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -557,6 +557,8 @@ public class SessionVariable implements Serializable, 
Writable {
 
     public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 
"huge_table_default_sample_rows";
     public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 
"huge_table_lower_bound_size_in_bytes";
+    public static final String PARTITION_SAMPLE_COUNT = 
"partition_sample_count";
+    public static final String PARTITION_SAMPLE_ROW_COUNT = 
"partition_sample_row_count";
 
     // for spill to disk
     public static final String EXTERNAL_SORT_BYTES_THRESHOLD = 
"external_sort_bytes_threshold";
@@ -2007,6 +2009,18 @@ public class SessionVariable implements Serializable, 
Writable {
                             + "considered outdated."})
     public int tableStatsHealthThreshold = 90;
 
+    @VariableMgr.VarAttr(name = PARTITION_SAMPLE_COUNT, flag = 
VariableMgr.GLOBAL,
+            description = {
+                    "大分区表采样的分区数上限",
+                    "The upper limit of the number of partitions for sampling 
large partitioned tables.\n"})
+    public int partitionSampleCount = 30;
+
+    @VariableMgr.VarAttr(name = PARTITION_SAMPLE_ROW_COUNT, flag = 
VariableMgr.GLOBAL,
+            description = {
+                    "大分区表采样的行数上限",
+                    "The upper limit of the number of rows for sampling large 
partitioned tables.\n"})
+    public long partitionSampleRowCount = 3_000_000_000L;
+
     @VariableMgr.VarAttr(name = ENABLE_MATERIALIZED_VIEW_REWRITE, needForward 
= true,
             description = {"是否开启基于结构信息的物化视图透明改写",
                     "Whether to enable materialized view rewriting based on 
struct info"})
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index f0a55f9b54e..ba2b2770a5d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -63,7 +63,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
     private boolean partitionColumnSampleTooManyRows = false;
     private boolean scanFullTable = false;
     private static final long MAXIMUM_SAMPLE_ROWS = 1_000_000_000;
-    private static final int PARTITION_COUNT_TO_SAMPLE = 5;
 
     @VisibleForTesting
     public OlapAnalysisTask() {
@@ -386,7 +385,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
                 MaterializedIndex materializedIndex = p.getIndex(indexId);
                 
pickedTabletIds.addAll(materializedIndex.getTabletIdsInOrder());
             }
-            if (pickedRows >= MAXIMUM_SAMPLE_ROWS || pickedPartitionCount > 
PARTITION_COUNT_TO_SAMPLE) {
+            if (pickedRows >= StatisticsUtil.getPartitionSampleRowCount()
+                    || pickedPartitionCount >= 
StatisticsUtil.getPartitionSampleCount()) {
                 break;
             }
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index f6d49ea079b..414a590fb77 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -103,6 +103,10 @@ public class StatisticConstants {
 
     public static final int MSG_LEN_UPPER_BOUND = 1024;
 
+    public static final int PARTITION_SAMPLE_COUNT = 30;
+
+    public static final long PARTITION_SAMPLE_ROW_COUNT = 3_000_000_000L;
+
     static {
         SYSTEM_DBS.add(FeConstants.INTERNAL_DB_NAME);
         SYSTEM_DBS.add(InfoSchemaDb.DATABASE_NAME);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 6b50cd32a1e..b482e5c225c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -943,6 +943,24 @@ public class StatisticsUtil {
         return StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD;
     }
 
+    public static int getPartitionSampleCount() {
+        try {
+            return 
findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_COUNT).partitionSampleCount;
+        } catch (Exception e) {
+            LOG.warn("Fail to get value of partition_sample_count, return 
default", e);
+        }
+        return StatisticConstants.PARTITION_SAMPLE_COUNT;
+    }
+
+    public static long getPartitionSampleRowCount() {
+        try {
+            return 
findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_ROW_COUNT).partitionSampleRowCount;
+        } catch (Exception e) {
+            LOG.warn("Fail to get value of partition_sample_row_count, return 
default", e);
+        }
+        return StatisticConstants.PARTITION_SAMPLE_ROW_COUNT;
+    }
+
     public static String encodeValue(ResultRow row, int index) {
         if (row == null || row.getValues().size() <= index) {
             return "NULL";


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to