yujun777 commented on code in PR #26412:
URL: https://github.com/apache/doris/pull/26412#discussion_r1381684464
##########
fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java:
##########
@@ -233,6 +233,21 @@ private boolean checkMoveCompleted(TabletMove move) {
return !bes.contains(move.fromBe) && bes.contains(move.toBe);
}
+ public boolean isNeedBalanced(LoadStatisticForTag clusterStat,
TStorageMedium medium,
+ List<Long> fromBes, List<Long> toBes) {
+ MovesCacheMap.MovesCache movesInProgress =
movesCacheMap.getCache(clusterStat.getTag(), medium);
+ if (movesInProgress == null) {
+ return checkCacheEmptyForLong();
+ }
+
+ fromBes.addAll(movesInProgress.get().asMap().values().stream()
+ .filter(p -> p.second != -1L).map(p ->
p.first.fromBe).collect(Collectors.toList()));
+ toBes.addAll(movesInProgress.get().asMap().values().stream()
+ .filter(p -> p.second != -1L).map(p ->
p.first.toBe).collect(Collectors.toList()));
+
+ return checkCacheEmptyForLong();
+ }
+
// cache empty for 10 min
public boolean checkCacheEmptyForLong() {
Review Comment:
remove this function and its field cacheEmptyTimestamp
##########
fe/fe-core/src/main/java/org/apache/doris/clone/DiskRebalancer.java:
##########
@@ -128,14 +129,17 @@ protected List<TabletSchedCtx>
selectAlternativeTabletsForCluster(
List<BackendLoadStatistic> highBEs = Lists.newArrayList();
clusterStat.getBackendStatisticByClass(lowBEs, midBEs, highBEs,
medium);
- if (Config.tablet_rebalancer_type.equalsIgnoreCase("partition")) {
- PartitionRebalancer rebalancer = (PartitionRebalancer)
Env.getCurrentEnv()
- .getTabletScheduler().getRebalancer();
- if (rebalancer != null && rebalancer.checkCacheEmptyForLong()) {
- midBEs.addAll(lowBEs);
- midBEs.addAll(highBEs);
- }
- } else if (!(lowBEs.isEmpty() && highBEs.isEmpty())) {
+ Rebalancer rebalancer =
Env.getCurrentEnv().getTabletScheduler().getRebalancer();
+ List<Long> fromBes = new ArrayList<>();
+ List<Long> toBes = new ArrayList<>();
+ if (rebalancer != null && rebalancer.isNeedBalanced(clusterStat,
medium, fromBes, toBes)) {
Review Comment:
check rebalancer is long time no pick tablets with (tag, medium), if
that, then move all low be and high be to mid be.
##########
fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java:
##########
@@ -77,6 +86,31 @@ public List<TabletSchedCtx> selectAlternativeTablets() {
protected abstract List<TabletSchedCtx> selectAlternativeTabletsForCluster(
LoadStatisticForTag clusterStat, TStorageMedium medium);
+ protected boolean isNeedBalanced(LoadStatisticForTag clusterStat,
TStorageMedium medium,
+ List<Long> fromBes, List<Long> toBes) {
+ return true;
+ }
+
+ protected boolean unPickOverLongTime(LoadStatisticForTag clusterStat,
TStorageMedium medium) {
+ if (lastPickTimeTable.containsRow(clusterStat)) {
+ Map<TStorageMedium, Long> mediumLastPickTimeMap =
lastPickTimeTable.row(clusterStat);
+ if (mediumLastPickTimeMap.containsKey(medium)) {
+ Long lastPickTimeTable = mediumLastPickTimeMap.get(medium);
+ return lastPickTimeTable > 0 && System.currentTimeMillis() >
lastPickTimeTable + 10 * 60 * 1000L;
+ } else {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
+
+ protected void removeFromLastPickTimeTable(Tag tag, TStorageMedium medium)
{
Review Comment:
No need to consider remove from pickTimeTable. Its size is very small.
##########
fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java:
##########
@@ -77,6 +77,11 @@ public List<TabletSchedCtx> selectAlternativeTablets() {
protected abstract List<TabletSchedCtx> selectAlternativeTabletsForCluster(
Review Comment:
record selectAlternativeTablets(tag, medium) time if its return tablets not
empty. so that we can know how long it's unpick tablets, if it has unpicked
for long, the disk rebalance should always try to sched storage migration
with all backends.
##########
fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java:
##########
@@ -1472,6 +1475,7 @@ private void finalizeTabletCtx(TabletSchedCtx tabletCtx,
TabletSchedCtx.State st
if (state == TabletSchedCtx.State.FINISHED) {
tryAddAfterFinished(tabletCtx);
}
+ rebalancer.removeFromLastPickTimeTable(tabletCtx.getTag(),
tabletCtx.getStorageMedium());
Review Comment:
remove this
##########
fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java:
##########
@@ -55,6 +57,9 @@ public abstract class Rebalancer {
// be id -> end time of prio
protected Map<Long, Long> prioBackends = Maps.newConcurrentMap();
+ // tag -> (medium, timestamp)
+ private Table<LoadStatisticForTag, TStorageMedium, Long> lastPickTimeTable
= HashBasedTable.create();
Review Comment:
use Table<Tag, ...>
##########
fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java:
##########
@@ -77,6 +86,31 @@ public List<TabletSchedCtx> selectAlternativeTablets() {
protected abstract List<TabletSchedCtx> selectAlternativeTabletsForCluster(
LoadStatisticForTag clusterStat, TStorageMedium medium);
+ protected boolean isNeedBalanced(LoadStatisticForTag clusterStat,
TStorageMedium medium,
+ List<Long> fromBes, List<Long> toBes) {
+ return true;
+ }
+
+ protected boolean unPickOverLongTime(LoadStatisticForTag clusterStat,
TStorageMedium medium) {
+ if (lastPickTimeTable.containsRow(clusterStat)) {
+ Map<TStorageMedium, Long> mediumLastPickTimeMap =
lastPickTimeTable.row(clusterStat);
+ if (mediumLastPickTimeMap.containsKey(medium)) {
+ Long lastPickTimeTable = mediumLastPickTimeMap.get(medium);
Review Comment:
Long lastPickTime = lastPickTimeTable.get(tag, medium)
return lastPickTime == null || now - lastPickTime >= 5min
##########
fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java:
##########
@@ -77,6 +86,31 @@ public List<TabletSchedCtx> selectAlternativeTablets() {
protected abstract List<TabletSchedCtx> selectAlternativeTabletsForCluster(
LoadStatisticForTag clusterStat, TStorageMedium medium);
+ protected boolean isNeedBalanced(LoadStatisticForTag clusterStat,
TStorageMedium medium,
Review Comment:
no use function
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]