This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new bc1f6b32b94 branch-3.0: [improvement](statistics)Add session variable
for partition sample count. #48218 (#49091)
bc1f6b32b94 is described below
commit bc1f6b32b94081ee52fb7351e6d1f4bd9802f91d
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Mar 14 20:29:04 2025 +0800
branch-3.0: [improvement](statistics)Add session variable for partition
sample count. #48218 (#49091)
Cherry-picked from #48218
Co-authored-by: James <[email protected]>
---
.../main/java/org/apache/doris/qe/SessionVariable.java | 14 ++++++++++++++
.../org/apache/doris/statistics/OlapAnalysisTask.java | 4 ++--
.../apache/doris/statistics/StatisticConstants.java | 4 ++++
.../apache/doris/statistics/util/StatisticsUtil.java | 18 ++++++++++++++++++
4 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 666bcd43b15..326f75aba3d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -557,6 +557,8 @@ public class SessionVariable implements Serializable,
Writable {
public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS =
"huge_table_default_sample_rows";
public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES =
"huge_table_lower_bound_size_in_bytes";
+ public static final String PARTITION_SAMPLE_COUNT =
"partition_sample_count";
+ public static final String PARTITION_SAMPLE_ROW_COUNT =
"partition_sample_row_count";
// for spill to disk
public static final String EXTERNAL_SORT_BYTES_THRESHOLD =
"external_sort_bytes_threshold";
@@ -2007,6 +2009,18 @@ public class SessionVariable implements Serializable,
Writable {
+ "considered outdated."})
public int tableStatsHealthThreshold = 90;
+ @VariableMgr.VarAttr(name = PARTITION_SAMPLE_COUNT, flag =
VariableMgr.GLOBAL,
+ description = {
+ "大分区表采样的分区数上限",
+ "The upper limit of the number of partitions for sampling
large partitioned tables.\n"})
+ public int partitionSampleCount = 30;
+
+ @VariableMgr.VarAttr(name = PARTITION_SAMPLE_ROW_COUNT, flag =
VariableMgr.GLOBAL,
+ description = {
+ "大分区表采样的行数上限",
+ "The upper limit of the number of rows for sampling large
partitioned tables.\n"})
+ public long partitionSampleRowCount = 3_000_000_000L;
+
@VariableMgr.VarAttr(name = ENABLE_MATERIALIZED_VIEW_REWRITE, needForward
= true,
description = {"是否开启基于结构信息的物化视图透明改写",
"Whether to enable materialized view rewriting based on
struct info"})
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index f0a55f9b54e..ba2b2770a5d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -63,7 +63,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
private boolean partitionColumnSampleTooManyRows = false;
private boolean scanFullTable = false;
private static final long MAXIMUM_SAMPLE_ROWS = 1_000_000_000;
- private static final int PARTITION_COUNT_TO_SAMPLE = 5;
@VisibleForTesting
public OlapAnalysisTask() {
@@ -386,7 +385,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
MaterializedIndex materializedIndex = p.getIndex(indexId);
pickedTabletIds.addAll(materializedIndex.getTabletIdsInOrder());
}
- if (pickedRows >= MAXIMUM_SAMPLE_ROWS || pickedPartitionCount >
PARTITION_COUNT_TO_SAMPLE) {
+ if (pickedRows >= StatisticsUtil.getPartitionSampleRowCount()
+ || pickedPartitionCount >=
StatisticsUtil.getPartitionSampleCount()) {
break;
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index f6d49ea079b..414a590fb77 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -103,6 +103,10 @@ public class StatisticConstants {
public static final int MSG_LEN_UPPER_BOUND = 1024;
+ public static final int PARTITION_SAMPLE_COUNT = 30;
+
+ public static final long PARTITION_SAMPLE_ROW_COUNT = 3_000_000_000L;
+
static {
SYSTEM_DBS.add(FeConstants.INTERNAL_DB_NAME);
SYSTEM_DBS.add(InfoSchemaDb.DATABASE_NAME);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 6b50cd32a1e..b482e5c225c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -943,6 +943,24 @@ public class StatisticsUtil {
return StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD;
}
+ public static int getPartitionSampleCount() {
+ try {
+ return
findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_COUNT).partitionSampleCount;
+ } catch (Exception e) {
+ LOG.warn("Fail to get value of partition_sample_count, return
default", e);
+ }
+ return StatisticConstants.PARTITION_SAMPLE_COUNT;
+ }
+
+ public static long getPartitionSampleRowCount() {
+ try {
+ return
findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_ROW_COUNT).partitionSampleRowCount;
+ } catch (Exception e) {
+ LOG.warn("Fail to get value of partition_sample_row_count, return
default", e);
+ }
+ return StatisticConstants.PARTITION_SAMPLE_ROW_COUNT;
+ }
+
public static String encodeValue(ResultRow row, int index) {
if (row == null || row.getValues().size() <= index) {
return "NULL";
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]