This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 51fa2202b10 [improve](partition) Increase partition limit defaults to
20000 and add near-limit metrics (#61511)
51fa2202b10 is described below
commit 51fa2202b10beaad5ba2ba47acb22fd0208393a6
Author: Yongqiang YANG <[email protected]>
AuthorDate: Fri Mar 20 02:41:43 2026 -0700
[improve](partition) Increase partition limit defaults to 20000 and add
near-limit metrics (#61511)
## Summary
- Raise `max_dynamic_partition_num` default from 500 to 20000 and
`max_auto_partition_num` from 2000 to 20000 to match modern production
workloads
- Add warning logs when partition counts exceed 80% of their configured
limits, enabling proactive detection before hard failures
- Add Prometheus counter metrics (`auto_partition_near_limit_count`,
`dynamic_partition_near_limit_count`) for monitoring/alerting
## Test plan
- [ ] Verify existing dynamic partition tests pass with new default
(tests explicitly set config values, so unaffected)
- [ ] Verify auto-partition limit check still errors correctly when
exceeded
- [ ] Verify warning logs appear when partition count is between
80%-100% of limit
- [ ] Verify new metrics appear in `/metrics` Prometheus endpoint
- [ ] Test Prometheus alert rule:
`rate(doris_fe_auto_partition_near_limit_count[5m]) > 0`
🤖 Generated with [Claude Code](https://claude.com/claude-code)
---------
Co-authored-by: Claude Opus 4.6 <[email protected]>
Co-authored-by: dataroaring <[email protected]>
---
.../src/main/java/org/apache/doris/common/Config.java | 6 +++---
.../apache/doris/common/util/DynamicPartitionUtil.java | 17 ++++++++++++++---
.../main/java/org/apache/doris/metric/MetricRepo.java | 14 ++++++++++++++
.../org/apache/doris/service/FrontendServiceImpl.java | 14 ++++++++++++--
4 files changed, 43 insertions(+), 8 deletions(-)
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index a25b65fca27..04dd9a2d610 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1521,7 +1521,7 @@ public class Config extends ConfigBase {
* The number is determined by "start" and "end" in the dynamic partition
parameters.
*/
@ConfField(mutable = true, masterOnly = true)
- public static int max_dynamic_partition_num = 500;
+ public static int max_dynamic_partition_num = 20000;
/**
* Used to limit the maximum number of partitions that can be created when
creating multi partition,
@@ -2690,8 +2690,8 @@ public class Config extends ConfigBase {
@ConfField(mutable = true, masterOnly = true, description = {
"For auto-partitioned tables to prevent users from accidentally
creating a large number of partitions, "
- + "the number of partitions allowed per OLAP table is
`max_auto_partition_num`. Default 2000."})
- public static int max_auto_partition_num = 2000;
+ + "the number of partitions allowed per OLAP table is
`max_auto_partition_num`. Default 20000."})
+ public static int max_auto_partition_num = 20000;
@ConfField(mutable = true, masterOnly = true, description = {
"The maximum difference in the number of tablets of each BE in
partition rebalance mode. "
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
index db12f6266ea..516d6942478 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
@@ -42,6 +42,7 @@ import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.FeNameFormat;
import org.apache.doris.common.UserException;
+import org.apache.doris.metric.MetricRepo;
import org.apache.doris.policy.StoragePolicy;
import org.apache.doris.resource.Tag;
import org.apache.doris.thrift.TStorageMedium;
@@ -641,10 +642,20 @@ public class DynamicPartitionUtil {
}
expectCreatePartitionNum = (long) end - start;
- if (!isReplay && hasEnd && (expectCreatePartitionNum >
Config.max_dynamic_partition_num)
+ int dynamicPartitionLimit = Config.max_dynamic_partition_num;
+ if (!isReplay && hasEnd
&&
Boolean.parseBoolean(analyzedProperties.getOrDefault(DynamicPartitionProperty.ENABLE,
"true"))) {
- throw new DdlException("Too many dynamic partitions: "
- + expectCreatePartitionNum + ". Limit: " +
Config.max_dynamic_partition_num);
+ if (expectCreatePartitionNum > dynamicPartitionLimit) {
+ throw new DdlException("Too many dynamic partitions: "
+ + expectCreatePartitionNum + ". Limit: " +
dynamicPartitionLimit);
+ } else if (expectCreatePartitionNum > dynamicPartitionLimit * 8L /
10) {
+ LOG.warn("Dynamic partition count {} is approaching limit {}
(>80%)."
+ + " Consider increasing max_dynamic_partition_num.",
+ expectCreatePartitionNum, dynamicPartitionLimit);
+ if (MetricRepo.isInit) {
+
MetricRepo.COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT.increase(1L);
+ }
+ }
}
if
(properties.containsKey(DynamicPartitionProperty.START_DAY_OF_MONTH)) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
index cd0755fd9da..c28c2aeb99f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
@@ -256,6 +256,10 @@ public final class MetricRepo {
public static GaugeMetricImpl<Long> GAUGE_AVG_PARTITION_SIZE_BYTES;
public static GaugeMetricImpl<Long> GAUGE_AVG_TABLET_SIZE_BYTES;
+ // Partition near-limit warnings
+ public static LongCounterMetric COUNTER_AUTO_PARTITION_NEAR_LIMIT;
+ public static LongCounterMetric COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT;
+
// Agent task
public static LongCounterMetric COUNTER_AGENT_TASK_REQUEST_TOTAL;
public static AutoMappedMetric<LongCounterMetric> COUNTER_AGENT_TASK_TOTAL;
@@ -1040,6 +1044,16 @@ public final class MetricRepo {
GAUGE_AVG_TABLET_SIZE_BYTES = new
GaugeMetricImpl<>("avg_tablet_size_bytes", MetricUnit.BYTES, "", 0L);
DORIS_METRIC_REGISTER.addMetrics(GAUGE_AVG_TABLET_SIZE_BYTES);
+ // Partition near-limit warning counters
+ COUNTER_AUTO_PARTITION_NEAR_LIMIT = new
LongCounterMetric("auto_partition_near_limit_count",
+ MetricUnit.NOUNIT,
+ "number of times auto partition count exceeded 80% of
max_auto_partition_num");
+ DORIS_METRIC_REGISTER.addMetrics(COUNTER_AUTO_PARTITION_NEAR_LIMIT);
+ COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT = new
LongCounterMetric("dynamic_partition_near_limit_count",
+ MetricUnit.NOUNIT,
+ "number of times dynamic partition count exceeded 80% of
max_dynamic_partition_num");
+ DORIS_METRIC_REGISTER.addMetrics(COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT);
+
COUNTER_AGENT_TASK_REQUEST_TOTAL = new
LongCounterMetric("agent_task_request_total", MetricUnit.NOUNIT,
"total agent batch task request send to BE");
DORIS_METRIC_REGISTER.addMetrics(COUNTER_AGENT_TASK_REQUEST_TOTAL);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
index f074711854c..44410af0163 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
@@ -96,6 +96,7 @@ import
org.apache.doris.load.routineload.RoutineLoadJob.JobState;
import org.apache.doris.load.routineload.RoutineLoadManager;
import org.apache.doris.master.MasterImpl;
import org.apache.doris.meta.MetaContext;
+import org.apache.doris.metric.MetricRepo;
import org.apache.doris.mysql.privilege.AccessControllerManager;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.nereids.trees.plans.PlanNodeAndHash;
@@ -4391,15 +4392,24 @@ public class FrontendServiceImpl implements
FrontendService.Iface {
// check partition's number limit. because partitions in
addPartitionClauseMap may be duplicated with existing
// partitions, which would lead to false positive. so we should check
the partition number AFTER adding new
// partitions using its ACTUAL NUMBER, rather than the sum of existing
and requested partitions.
- if (olapTable.getPartitionNum() > Config.max_auto_partition_num) {
+ int partitionNum = olapTable.getPartitionNum();
+ int autoPartitionLimit = Config.max_auto_partition_num;
+ if (partitionNum > autoPartitionLimit) {
String errorMessage = String.format(
"partition numbers %d exceeded limit of variable
max_auto_partition_num %d",
- olapTable.getPartitionNum(),
Config.max_auto_partition_num);
+ partitionNum, autoPartitionLimit);
LOG.warn(errorMessage);
errorStatus.setErrorMsgs(Lists.newArrayList(errorMessage));
result.setStatus(errorStatus);
LOG.warn("send create partition error status: {}", result);
return result;
+ } else if (partitionNum > autoPartitionLimit * 8 / 10) {
+ LOG.warn("Table {}.{} auto partition count {} is approaching limit
{} (>80%)."
+ + " Consider increasing max_auto_partition_num.",
+ db.getFullName(), olapTable.getName(), partitionNum,
autoPartitionLimit);
+ if (MetricRepo.isInit) {
+ MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L);
+ }
}
// build partition & tablets
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]