This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d7a5c37672 [improvement](tablet clone) update the capacity coeficient
for calculating backend load score (#22857)
d7a5c37672 is described below
commit d7a5c376727fdd03c94884e6d88676c07c7f2f50
Author: yujun <[email protected]>
AuthorDate: Tue Aug 15 17:27:31 2023 +0800
[improvement](tablet clone) update the capacity coeficient for calculating
backend load score (#22857)
update the capacity coeficient for calcutating the backend load score:
1. Add fe config entry `backend_load_capacity_coeficient` to allow setting
the capacity coeficient manually;
2. Adjust calculating capacity coeficient as below.
We emphasize disk usage for calculating load score.
If a be has a high used capacity percent, we should increase its load score.
So we increase capacity coefficient with a be's used capacity percent.
But this is not enough. For example, if the tablets have a big difference
in data size.
Then for below two BEs, their load score maybe the same:
BE A: disk usage = 60%, replica number = 2000 (it contains the big
tablets)
BE B: disk usage = 30%, replica number = 4000 (it contains the small
tablets)
But what we want is: firstly move some big tablets from A to B, after their
disk usages are close,
then move some small tablets from B to A, finally both of their disk usages
and replica number
are close.
To achieve this, when the max difference between all BE's disk usages >=
30%, we set the capacity cofficient to 1.0 and avoid the affect of replica
num. After the disk usage difference decrease, then decrease the capacity
cofficient to make replica num effective.
---
.../main/java/org/apache/doris/common/Config.java | 14 ++++
.../apache/doris/clone/BackendLoadStatistic.java | 87 ++++++++++++++++++----
.../apache/doris/clone/LoadStatisticForTag.java | 30 ++++++--
.../org/apache/doris/clone/DiskRebalanceTest.java | 1 +
4 files changed, 111 insertions(+), 21 deletions(-)
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 53b00dcefa..7ed352eff5 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -614,6 +614,20 @@ public class Config extends ConfigBase {
"The high water of disk capacity used percent. This is used for
calculating load score of a backend."})
public static double capacity_used_percent_high_water = 0.75;
+ @ConfField(mutable = true, masterOnly = true, description = {
+ "负载均衡时,磁盘使用率最大差值。",
+ "The max diff of disk capacity used percent between BE. "
+ + "It is used for calculating load score of a backend."})
+ public static double used_capacity_percent_max_diff = 0.30;
+
+ @ConfField(mutable = true, masterOnly = true, description = {
+ "设置固定的 BE 负载分数中磁盘使用率系数。BE 负载分数会综合磁盘使用率和副本数而得。有效值范围为[0, 1],"
+ + "当超出此范围时,则使用其他方法自动计算此系数。",
+ "Sets a fixed disk usage factor in the BE load fraction. The BE
load score is a combination of disk usage "
+ + "and replica count. The valid value range is [0, 1].
When it is out of this range, other "
+ + "methods are used to automatically calculate this
coefficient."})
+ public static double backend_load_capacity_coeficient = -1.0;
+
@ConfField(mutable = true, masterOnly = true, description = {
"ALTER TABLE 请求的最大超时时间。设置的足够长以适应表的数据量。",
"Maximal timeout of ALTER TABLE request. Set long enough to fit
your table data size."})
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
index f1391b26d2..fdf45afe81 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
@@ -87,11 +87,14 @@ public class BackendLoadStatistic {
private Tag tag;
public static class LoadScore {
- public double replicaNumCoefficient = 0.5;
public double capacityCoefficient = 0.5;
public double score = 0.0;
public static final LoadScore DUMMY = new LoadScore();
+
+ public double getReplicaNumCoefficient() {
+ return 1.0 - capacityCoefficient;
+ }
}
private Map<TStorageMedium, Long> totalCapacityMap = Maps.newHashMap();
@@ -244,24 +247,27 @@ public class BackendLoadStatistic {
}
public void calcScore(Map<TStorageMedium, Double>
avgClusterUsedCapacityPercentMap,
+ Map<TStorageMedium, Double> maxUsedPercentDiffMap,
Map<TStorageMedium, Double> avgClusterReplicaNumPerBackendMap) {
for (TStorageMedium medium : TStorageMedium.values()) {
- LoadScore loadScore =
calcSore(totalUsedCapacityMap.getOrDefault(medium, 0L),
+ LoadScore loadScore =
calcScore(totalUsedCapacityMap.getOrDefault(medium, 0L),
totalCapacityMap.getOrDefault(medium, 1L),
totalReplicaNumMap.getOrDefault(medium, 0L),
avgClusterUsedCapacityPercentMap.getOrDefault(medium, 0.0),
+ maxUsedPercentDiffMap.getOrDefault(medium, 0.0),
avgClusterReplicaNumPerBackendMap.getOrDefault(medium,
0.0));
loadScoreMap.put(medium, loadScore);
LOG.debug("backend {}, medium: {}, capacity coefficient: {},
replica coefficient: {}, load score: {}",
- beId, medium, loadScore.capacityCoefficient,
loadScore.replicaNumCoefficient, loadScore.score);
+ beId, medium, loadScore.capacityCoefficient,
loadScore.getReplicaNumCoefficient(),
+ loadScore.score);
}
}
- public static LoadScore calcSore(long beUsedCapacityB, long
beTotalCapacityB, long beTotalReplicaNum,
- double avgClusterUsedCapacityPercent, double
avgClusterReplicaNumPerBackend) {
+ public static LoadScore calcScore(long beUsedCapacityB, long
beTotalCapacityB, long beTotalReplicaNum,
+ double avgClusterUsedCapacityPercent, double maxUsedPercentDiff,
double avgClusterReplicaNumPerBackend) {
double usedCapacityPercent = (beUsedCapacityB / (double)
beTotalCapacityB);
double capacityProportion = avgClusterUsedCapacityPercent <= 0 ? 0.0
@@ -271,22 +277,71 @@ public class BackendLoadStatistic {
LoadScore loadScore = new LoadScore();
- // If this backend's capacity used percent < 50%, set
capacityCoefficient to 0.5.
- // Else if capacity used percent > 75%, set capacityCoefficient to 1.
- // Else, capacityCoefficient changed smoothly from 0.5 to 1 with used
capacity increasing
- // Function: (2 * usedCapacityPercent - 0.5)
- if (!Config.be_rebalancer_fuzzy_test) {
- loadScore.capacityCoefficient = usedCapacityPercent < 0.5 ? 0.5
- : (usedCapacityPercent >
Config.capacity_used_percent_high_water ? 1.0
- : (2 * usedCapacityPercent - 0.5));
- loadScore.replicaNumCoefficient = 1 -
loadScore.capacityCoefficient;
+ if (Config.backend_load_capacity_coeficient >= 0.0 &&
Config.backend_load_capacity_coeficient <= 1.0) {
+ loadScore.capacityCoefficient =
Config.backend_load_capacity_coeficient;
+ } else if (!Config.be_rebalancer_fuzzy_test) {
+ // Emphasize disk usage for calculating load score.
+ //
+ // thisBeCapacityCoefficient:
+ // If this be has a high used capacity percent, we should
increase its load score.
+ // So we increase capacity coefficient with this be's used
capacity percent.
+ //
+ // Also we emphasize the difference of disk usage between all the
BEs.
+ // For example, if the tablets have a big difference in data size.
+ // Then for below two BEs, their load score maybe the same:
+ // BE A: disk usage = 60%, replica number = 2000 (it contains
the big tablets)
+ // BE B: disk usage = 30%, replica number = 4000 (it contains
the small tablets)
+ //
+ // But what we want is: firstly move some big tablets from A to B,
after their disk usages are close,
+ // then move some small tablets from B to A, finally both of their
disk usages and replica number
+ // are close.
+ //
+ // allBeCapacityCoefficient is used to achieve this, when the max
difference between all BE's
+ // disk usages >= 30%, we set the capacity cofficient to 1.0 and
avoid the affect of replica num.
+ // After the disk usage difference decrease, then decrease the
capacity cofficient.
+ //
+ double thisBeCapacityCoefficient =
getSmoothCofficient(usedCapacityPercent, 0.5,
+ Config.capacity_used_percent_high_water);
+ double allBeCapacityCoefficient =
getSmoothCofficient(maxUsedPercentDiff, 0.05,
+ Config.used_capacity_percent_max_diff);
+
+ loadScore.capacityCoefficient =
Math.max(thisBeCapacityCoefficient, allBeCapacityCoefficient);
}
loadScore.score = capacityProportion * loadScore.capacityCoefficient
- + replicaNumProportion * loadScore.replicaNumCoefficient;
+ + replicaNumProportion * loadScore.getReplicaNumCoefficient();
return loadScore;
}
+ // the return cofficient:
+ // 1. percent <= percentLowWatermark: cofficient = 0.5;
+ // 2. percentLowWatermark < percent < percentHighWatermark: cofficient
linear increase from 0.5 to 1.0;
+ // 3. percent >= percentHighWatermark: cofficient = 1.0;
+ public static double getSmoothCofficient(double percent, double
percentLowWatermark,
+ double percentHighWatermark) {
+ final double lowCofficient = 0.5;
+ final double highCofficient = 1.0;
+
+ // low watermark and high watermark equal, then return 0.75
+ if (Math.abs(percentHighWatermark - percentLowWatermark) < 1e-6) {
+ return (lowCofficient + highCofficient) / 2;
+ }
+ if (percentLowWatermark > percentHighWatermark) {
+ double tmp = percentLowWatermark;
+ percentLowWatermark = percentHighWatermark;
+ percentHighWatermark = tmp;
+ }
+ if (percent <= percentLowWatermark) {
+ return lowCofficient;
+ }
+ if (percent >= percentHighWatermark) {
+ return highCofficient;
+ }
+
+ return lowCofficient + (highCofficient - lowCofficient)
+ * (percent - percentLowWatermark) / (percentHighWatermark -
percentLowWatermark);
+ }
+
public BalanceStatus isFit(long tabletSize, TStorageMedium medium,
List<RootPathLoadStatistic> result, boolean isSupplement) {
BalanceStatus status = new BalanceStatus(ErrCode.COMMON_ERROR);
@@ -444,7 +499,7 @@ public class BackendLoadStatistic {
info.add(String.valueOf(totalReplicaNumMap.getOrDefault(medium, 0L)));
LoadScore loadScore = loadScoreMap.getOrDefault(medium, new
LoadScore());
info.add(String.valueOf(loadScore.capacityCoefficient));
- info.add(String.valueOf(loadScore.replicaNumCoefficient));
+ info.add(String.valueOf(loadScore.getReplicaNumCoefficient()));
info.add(String.valueOf(loadScore.score));
info.add(clazzMap.getOrDefault(medium, Classification.INIT).name());
return info;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
index 4709575c69..224399f480 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
@@ -53,6 +53,7 @@ public class LoadStatisticForTag {
private final Map<TStorageMedium, Long> totalCapacityMap =
Maps.newHashMap();
private final Map<TStorageMedium, Long> totalUsedCapacityMap =
Maps.newHashMap();
private final Map<TStorageMedium, Long> totalReplicaNumMap =
Maps.newHashMap();
+ private final Map<TStorageMedium, Double> maxUsedPercentDiffMap =
Maps.newHashMap();
private final Map<TStorageMedium, Double> avgUsedCapacityPercentMap =
Maps.newHashMap();
private final Map<TStorageMedium, Double> avgReplicaNumPercentMap =
Maps.newHashMap();
private final Map<TStorageMedium, Double> avgLoadScoreMap =
Maps.newHashMap();
@@ -116,10 +117,27 @@ public class LoadStatisticForTag {
/ (double) totalCapacityMap.getOrDefault(medium, 1L));
avgReplicaNumPercentMap.put(medium,
totalReplicaNumMap.getOrDefault(medium, 0L)
/ (double) backendNumMap.getOrDefault(medium, 1));
+
+ double maxUsedPercent = -1.0;
+ double minUsedPercent = -1.0;
+ for (BackendLoadStatistic beStatistic : beLoadStatistics) {
+ long beTotalCapacityB = beStatistic.getTotalCapacityB(medium);
+ long beTotalUsedCapacityB =
beStatistic.getTotalUsedCapacityB(medium);
+ if (beTotalCapacityB > 0) {
+ double beUsedPercent = ((double) beTotalUsedCapacityB) /
beTotalCapacityB;
+ if (maxUsedPercent < 0.0 || beUsedPercent >
maxUsedPercent) {
+ maxUsedPercent = beUsedPercent;
+ }
+ if (minUsedPercent < 0.0 || beUsedPercent <
minUsedPercent) {
+ minUsedPercent = beUsedPercent;
+ }
+ }
+ }
+ maxUsedPercentDiffMap.put(medium, maxUsedPercent - minUsedPercent);
}
for (BackendLoadStatistic beStatistic : beLoadStatistics) {
- beStatistic.calcScore(avgUsedCapacityPercentMap,
avgReplicaNumPercentMap);
+ beStatistic.calcScore(avgUsedCapacityPercentMap,
maxUsedPercentDiffMap, avgReplicaNumPercentMap);
}
// classify all backends
@@ -250,13 +268,15 @@ public class LoadStatisticForTag {
currentSrcBeScore = srcBeStat.getLoadScore(medium);
currentDestBeScore = destBeStat.getLoadScore(medium);
- LoadScore newSrcBeScore =
BackendLoadStatistic.calcSore(srcBeStat.getTotalUsedCapacityB(medium) -
tabletSize,
+ LoadScore newSrcBeScore =
BackendLoadStatistic.calcScore(srcBeStat.getTotalUsedCapacityB(medium) -
tabletSize,
srcBeStat.getTotalCapacityB(medium),
srcBeStat.getReplicaNum(medium) - 1,
- avgUsedCapacityPercentMap.get(medium),
avgReplicaNumPercentMap.get(medium));
+ avgUsedCapacityPercentMap.get(medium),
maxUsedPercentDiffMap.get(medium),
+ avgReplicaNumPercentMap.get(medium));
- LoadScore newDestBeScore =
BackendLoadStatistic.calcSore(destBeStat.getTotalUsedCapacityB(medium) +
tabletSize,
+ LoadScore newDestBeScore =
BackendLoadStatistic.calcScore(destBeStat.getTotalUsedCapacityB(medium) +
tabletSize,
destBeStat.getTotalCapacityB(medium),
destBeStat.getReplicaNum(medium) + 1,
- avgUsedCapacityPercentMap.get(medium),
avgReplicaNumPercentMap.get(medium));
+ avgUsedCapacityPercentMap.get(medium),
maxUsedPercentDiffMap.get(medium),
+ avgReplicaNumPercentMap.get(medium));
double currentDiff = Math.abs(currentSrcBeScore -
avgLoadScoreMap.get(medium))
+ Math.abs(currentDestBeScore - avgLoadScoreMap.get(medium));
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/clone/DiskRebalanceTest.java
b/fe/fe-core/src/test/java/org/apache/doris/clone/DiskRebalanceTest.java
index 0c596a6f53..9d5ffe1e6d 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/DiskRebalanceTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/DiskRebalanceTest.java
@@ -82,6 +82,7 @@ public class DiskRebalanceTest {
@Before
public void setUp() throws Exception {
+ Config.used_capacity_percent_max_diff = 1.0;
db = new Database(1, "test db");
db.setClusterName(SystemInfoService.DEFAULT_CLUSTER);
new Expectations() {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]