This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new bcaa6459ecb branch-4.0: [improve](partition) Increase partition limit
defaults to 20000 and add near-limit metrics #61511 (#61764)
bcaa6459ecb is described below
commit bcaa6459ecbd67db9df3c0452bfe70fa92e1e7d2
Author: Yongqiang YANG <[email protected]>
AuthorDate: Thu Mar 26 04:11:34 2026 -0700
branch-4.0: [improve](partition) Increase partition limit defaults to 20000
and add near-limit metrics #61511 (#61764)
## Summary
Cherry-pick of #61511 to branch-4.0.
- Raise `max_dynamic_partition_num` default from 500 to 20000 and
`max_auto_partition_num` from 2000 to 20000 to match modern production
workloads
- Add warning logs when partition counts exceed 80% of their configured
limits, enabling proactive detection before hard failures
- Add Prometheus counter metrics (`auto_partition_near_limit_count`,
`dynamic_partition_near_limit_count`) for monitoring/alerting
## Conflict Resolution
- `Config.java`: Trivial context conflict in `max_auto_partition_num`
description formatting — resolved by taking the incoming change (20000
default + updated English description).
## Test plan
- [ ] Verify existing dynamic partition tests pass with new default
- [ ] Verify auto-partition limit check still errors correctly when
exceeded
- [ ] Verify warning logs appear when partition count is between
80%-100% of limit
- [ ] Verify new metrics appear in `/metrics` Prometheus endpoint
Co-authored-by: Claude Opus 4.6 <[email protected]>
Co-authored-by: dataroaring <[email protected]>
---
.../src/main/java/org/apache/doris/common/Config.java | 7 +++----
.../apache/doris/common/util/DynamicPartitionUtil.java | 17 ++++++++++++++---
.../main/java/org/apache/doris/metric/MetricRepo.java | 14 ++++++++++++++
.../org/apache/doris/service/FrontendServiceImpl.java | 14 ++++++++++++--
4 files changed, 43 insertions(+), 9 deletions(-)
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 41d47714a80..921796670c4 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1670,7 +1670,7 @@ public class Config extends ConfigBase {
* The number is determined by "start" and "end" in the dynamic partition
parameters.
*/
@ConfField(mutable = true, masterOnly = true)
- public static int max_dynamic_partition_num = 500;
+ public static int max_dynamic_partition_num = 20000;
/**
* Used to limit the maximum number of partitions that can be created when
creating multi partition,
@@ -2966,9 +2966,8 @@ public class Config extends ConfigBase {
@ConfField(mutable = true, masterOnly = true, description = {
"对于自动分区表,防止用户意外创建大量分区,每个 OLAP 表允许的分区数量为`max_auto_partition_num`。默认
2000。",
"For auto-partitioned tables to prevent users from accidentally
creating a large number of partitions, "
- + "the number of partitions allowed per OLAP table is
`max_auto_partition_num`. Default 2000."
- })
- public static int max_auto_partition_num = 2000;
+ + "the number of partitions allowed per OLAP table is
`max_auto_partition_num`. Default 20000."})
+ public static int max_auto_partition_num = 20000;
@ConfField(mutable = true, masterOnly = true, description = {
"Partition rebalance 方式下各个 BE 的 tablet 数最大差值,小于该值时,会诊断为已均衡",
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
index db12f6266ea..516d6942478 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
@@ -42,6 +42,7 @@ import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.FeNameFormat;
import org.apache.doris.common.UserException;
+import org.apache.doris.metric.MetricRepo;
import org.apache.doris.policy.StoragePolicy;
import org.apache.doris.resource.Tag;
import org.apache.doris.thrift.TStorageMedium;
@@ -641,10 +642,20 @@ public class DynamicPartitionUtil {
}
expectCreatePartitionNum = (long) end - start;
- if (!isReplay && hasEnd && (expectCreatePartitionNum >
Config.max_dynamic_partition_num)
+ int dynamicPartitionLimit = Config.max_dynamic_partition_num;
+ if (!isReplay && hasEnd
&&
Boolean.parseBoolean(analyzedProperties.getOrDefault(DynamicPartitionProperty.ENABLE,
"true"))) {
- throw new DdlException("Too many dynamic partitions: "
- + expectCreatePartitionNum + ". Limit: " +
Config.max_dynamic_partition_num);
+ if (expectCreatePartitionNum > dynamicPartitionLimit) {
+ throw new DdlException("Too many dynamic partitions: "
+ + expectCreatePartitionNum + ". Limit: " +
dynamicPartitionLimit);
+ } else if (expectCreatePartitionNum > dynamicPartitionLimit * 8L /
10) {
+ LOG.warn("Dynamic partition count {} is approaching limit {}
(>80%)."
+ + " Consider increasing max_dynamic_partition_num.",
+ expectCreatePartitionNum, dynamicPartitionLimit);
+ if (MetricRepo.isInit) {
+
MetricRepo.COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT.increase(1L);
+ }
+ }
}
if
(properties.containsKey(DynamicPartitionProperty.START_DAY_OF_MONTH)) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
index 6479575b64d..3ec8a97ae83 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
@@ -249,6 +249,10 @@ public final class MetricRepo {
public static GaugeMetricImpl<Long> GAUGE_AVG_PARTITION_SIZE_BYTES;
public static GaugeMetricImpl<Long> GAUGE_AVG_TABLET_SIZE_BYTES;
+ // Partition near-limit warnings
+ public static LongCounterMetric COUNTER_AUTO_PARTITION_NEAR_LIMIT;
+ public static LongCounterMetric COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT;
+
// Agent task
public static LongCounterMetric COUNTER_AGENT_TASK_REQUEST_TOTAL;
public static AutoMappedMetric<LongCounterMetric> COUNTER_AGENT_TASK_TOTAL;
@@ -1002,6 +1006,16 @@ public final class MetricRepo {
GAUGE_AVG_TABLET_SIZE_BYTES = new
GaugeMetricImpl<>("avg_tablet_size_bytes", MetricUnit.BYTES, "", 0L);
DORIS_METRIC_REGISTER.addMetrics(GAUGE_AVG_TABLET_SIZE_BYTES);
+ // Partition near-limit warning counters
+ COUNTER_AUTO_PARTITION_NEAR_LIMIT = new
LongCounterMetric("auto_partition_near_limit_count",
+ MetricUnit.NOUNIT,
+ "number of times auto partition count exceeded 80% of
max_auto_partition_num");
+ DORIS_METRIC_REGISTER.addMetrics(COUNTER_AUTO_PARTITION_NEAR_LIMIT);
+ COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT = new
LongCounterMetric("dynamic_partition_near_limit_count",
+ MetricUnit.NOUNIT,
+ "number of times dynamic partition count exceeded 80% of
max_dynamic_partition_num");
+ DORIS_METRIC_REGISTER.addMetrics(COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT);
+
COUNTER_AGENT_TASK_REQUEST_TOTAL = new
LongCounterMetric("agent_task_request_total", MetricUnit.NOUNIT,
"total agent batch task request send to BE");
DORIS_METRIC_REGISTER.addMetrics(COUNTER_AGENT_TASK_REQUEST_TOTAL);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
index 0bc4942ebf8..5a025524906 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
@@ -98,6 +98,7 @@ import
org.apache.doris.load.routineload.RoutineLoadJob.JobState;
import org.apache.doris.load.routineload.RoutineLoadManager;
import org.apache.doris.master.MasterImpl;
import org.apache.doris.meta.MetaContext;
+import org.apache.doris.metric.MetricRepo;
import org.apache.doris.mysql.privilege.AccessControllerManager;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.nereids.trees.plans.PlanNodeAndHash;
@@ -3832,15 +3833,24 @@ public class FrontendServiceImpl implements
FrontendService.Iface {
// check partition's number limit. because partitions in
addPartitionClauseMap may be duplicated with existing
// partitions, which would lead to false positive. so we should check
the partition number AFTER adding new
// partitions using its ACTUAL NUMBER, rather than the sum of existing
and requested partitions.
- if (olapTable.getPartitionNum() > Config.max_auto_partition_num) {
+ int partitionNum = olapTable.getPartitionNum();
+ int autoPartitionLimit = Config.max_auto_partition_num;
+ if (partitionNum > autoPartitionLimit) {
String errorMessage = String.format(
"partition numbers %d exceeded limit of variable
max_auto_partition_num %d",
- olapTable.getPartitionNum(),
Config.max_auto_partition_num);
+ partitionNum, autoPartitionLimit);
LOG.warn(errorMessage);
errorStatus.setErrorMsgs(Lists.newArrayList(errorMessage));
result.setStatus(errorStatus);
LOG.warn("send create partition error status: {}", result);
return result;
+ } else if (partitionNum > autoPartitionLimit * 8 / 10) {
+ LOG.warn("Table {}.{} auto partition count {} is approaching limit
{} (>80%)."
+ + " Consider increasing max_auto_partition_num.",
+ db.getFullName(), olapTable.getName(), partitionNum,
autoPartitionLimit);
+ if (MetricRepo.isInit) {
+ MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L);
+ }
}
// build partition & tablets
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]