This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new d7c873314e8 [fix](balance) Fix PartitionRebalancer generating invalid
moves to BEs without required storage medium (#62206) (#63755)
d7c873314e8 is described below
commit d7c873314e8f387bae5f7d645c478accc0146a4c
Author: deardeng <[email protected]>
AuthorDate: Thu May 28 10:36:55 2026 +0800
[fix](balance) Fix PartitionRebalancer generating invalid moves to BEs
without required storage medium (#62206) (#63755)
pick from https://github.com/apache/doris/pull/62206
---
docker/runtime/doris-compose/command.py | 15 +-
.../apache/doris/catalog/TabletInvertedIndex.java | 13 +-
.../apache/doris/clone/LoadStatisticForTag.java | 13 +-
.../apache/doris/clone/PartitionRebalancer.java | 2 +-
.../java/org/apache/doris/clone/RebalanceTest.java | 81 ++++++++++
.../org/apache/doris/clone/RebalancerTestUtil.java | 16 ++
.../doris/regression/suite/SuiteCluster.groovy | 31 ++--
...est_partition_rebalancer_medium_mismatch.groovy | 178 +++++++++++++++++++++
8 files changed, 329 insertions(+), 20 deletions(-)
diff --git a/docker/runtime/doris-compose/command.py
b/docker/runtime/doris-compose/command.py
index c0abb4a257e..db8b403b4a0 100644
--- a/docker/runtime/doris-compose/command.py
+++ b/docker/runtime/doris-compose/command.py
@@ -391,7 +391,7 @@ class UpCommand(Command):
)
group1.add_argument("--be-disks",
nargs="*",
- default=["HDD=1"],
+ default=None,
type=str,
help="Specify each be disks, each group is
\"disk_type=disk_num[,disk_capactity]\", "\
"disk_type is HDD or SSD, disk_capactity
is capactity limit in gb. default: HDD=1. "\
@@ -618,7 +618,8 @@ class UpCommand(Command):
args.NAME, args.IMAGE, args.cloud, args.root, args.fe_config,
args.be_config, args.ms_config, args.recycle_config,
args.remote_master_fe, args.local_network_ip, args.fe_follower,
- args.be_disks, args.be_cluster, args.reg_be, args.extra_hosts,
+ args.be_disks if args.be_disks is not None else ["HDD=1"],
+ args.be_cluster, args.reg_be, args.extra_hosts,
args.coverage_dir, cloud_store_config, args.sql_mode_node_mgr,
args.be_metaservice_endpoint, args.be_cluster_id, args.tde_ak,
args.tde_sk)
LOG.info("Create new cluster {} succ, cluster path is {}".format(
@@ -659,9 +660,19 @@ class UpCommand(Command):
related_nodes.append(node)
add_ids.append(node.id)
+ # If --be-disks is explicitly provided for an existing cluster,
+ # temporarily override cluster.be_disks so newly added BEs use
+ # the specified disk config instead of the original cluster config.
+ saved_be_disks = cluster.be_disks
+ if args.be_disks is not None:
+ cluster.be_disks = args.be_disks
+
for node_type, add_num, add_ids in add_type_nums:
do_add_node(node_type, add_num, add_ids)
+ # Restore original be_disks to avoid side effects
+ cluster.be_disks = saved_be_disks
+
if args.IMAGE:
for node in related_nodes:
node.set_image(args.IMAGE)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
index d90a7861662..7a14afc051d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
@@ -1011,7 +1011,9 @@ public class TabletInvertedIndex {
// Only build from available bes, exclude colocate tables
public Map<TStorageMedium, TreeMultimap<Long, PartitionBalanceInfo>>
buildPartitionInfoBySkew(
- List<Long> availableBeIds, Map<Long, Pair<TabletMove, Long>>
movesInProgress) {
+ List<Long> availableBeIds,
+ Map<TStorageMedium, List<Long>> availableBeIdsByMedium,
+ Map<Long, Pair<TabletMove, Long>> movesInProgress) {
Set<Long> dbIds = Sets.newHashSet();
Set<Long> tableIds = Sets.newHashSet();
Set<Long> partitionIds = Sets.newHashSet();
@@ -1072,11 +1074,14 @@ public class TabletInvertedIndex {
Map<Long, Long> countMap = partitionReplicasInfo.get(
tabletMeta.getPartitionId(),
tabletMeta.getIndexId());
if (countMap == null) {
- // If one be doesn't have any replica of one
partition, it should be counted too.
- countMap =
availableBeIds.stream().collect(Collectors.toMap(i -> i, i -> 0L));
+ // If one be doesn't have any replica of one partition,
+ // it should be counted too.
+ List<Long> availableBeIdsForMedium =
availableBeIdsByMedium.getOrDefault(
+ medium, Lists.newArrayList());
+ countMap =
availableBeIdsForMedium.stream().collect(Collectors.toMap(i -> i, i -> 0L));
}
- Long count = countMap.get(beId);
+ Long count = countMap.getOrDefault(beId, 0L);
countMap.put(beId, count + 1L);
partitionReplicasInfo.put(tabletMeta.getPartitionId(),
tabletMeta.getIndexId(), countMap);
partitionReplicasInfoMaps.put(medium,
partitionReplicasInfo);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
index 60a0d147917..e731d0701e0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
@@ -159,7 +159,8 @@ public class LoadStatisticForTag {
// Only count the available be
for (TStorageMedium medium : TStorageMedium.values()) {
TreeMultimap<Long, Long> beByTotalReplicaCount =
TreeMultimap.create();
-
beLoadStatistics.stream().filter(BackendLoadStatistic::isAvailable).forEach(beStat
->
+ beLoadStatistics.stream().filter(BackendLoadStatistic::isAvailable)
+ .filter(beStat -> beStat.hasMedium(medium)).forEach(beStat
->
beByTotalReplicaCount.put(beStat.getReplicaNum(medium),
beStat.getBeId()));
beByTotalReplicaCountMaps.put(medium, beByTotalReplicaCount);
}
@@ -173,9 +174,17 @@ public class LoadStatisticForTag {
.filter(BackendLoadStatistic::isAvailable)
.map(BackendLoadStatistic::getBeId)
.collect(Collectors.toList());
+ Map<TStorageMedium, List<Long>> availableBeIdsByMedium =
Maps.newHashMap();
+ for (TStorageMedium medium : TStorageMedium.values()) {
+ availableBeIdsByMedium.put(medium, beLoadStatistics.stream()
+ .filter(BackendLoadStatistic::isAvailable)
+ .filter(be -> be.hasMedium(medium))
+ .map(BackendLoadStatistic::getBeId)
+ .collect(Collectors.toList()));
+ }
Map<Long, Pair<TabletMove, Long>> movesInProgress = rebalancer ==
null ? Maps.newHashMap()
: ((PartitionRebalancer) rebalancer).getMovesInProgress();
- skewMaps = invertedIndex.buildPartitionInfoBySkew(availableBeIds,
movesInProgress);
+ skewMaps = invertedIndex.buildPartitionInfoBySkew(availableBeIds,
availableBeIdsByMedium, movesInProgress);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java
index 30a7a76b920..96db70cf76f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java
@@ -305,7 +305,7 @@ public class PartitionRebalancer extends Rebalancer {
List<RootPathLoadStatistic> paths = beStat.getPathStatistics();
List<Long> availPath = paths.stream().filter(path ->
path.getStorageMedium() == tabletCtx.getStorageMedium()
- && path.isFit(tabletCtx.getTabletSize(), false) ==
BalanceStatus.OK)
+ && path.isFit(tabletCtx.getTabletSize(),
false).ok())
.map(RootPathLoadStatistic::getPathHash).collect(Collectors.toList());
long pathHash = slot.takeAnAvailBalanceSlotFrom(availPath,
tabletCtx.getTag(),
tabletCtx.getStorageMedium());
diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java
b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java
index fc3bbb28485..e50f25d3ae4 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java
@@ -322,6 +322,87 @@ public class RebalanceTest {
Assert.assertEquals(needCheckTablets.size(), succeeded.get());
}
+ // Test for OPENSOURCE-192: PartitionRebalancer should not generate moves
+ // targeting a BE that lacks the required storage medium.
+ // Scenario: SSD tablets on BE 20001/20002, new BE 20003 has only HDD.
+ // Without the fix, the algorithm would pick BE 20003 (0 SSD replicas) as
the
+ // "least loaded" destination for SSD tablets, causing infinite scheduling
failures.
+ @Test
+ public void testPartitionRebalancerSkipBEWithoutMedium() {
+ Configurator.setLevel("org.apache.doris.clone.PartitionRebalancer",
Level.DEBUG);
+
+ // Add backends: 20001, 20002 have SSD; 20003 has only HDD
+ systemInfoService.addBackend(
+ RebalancerTestUtil.createBackend(20001L, 2048, 0,
TStorageMedium.SSD));
+ systemInfoService.addBackend(
+ RebalancerTestUtil.createBackend(20002L, 2048, 0,
TStorageMedium.SSD));
+ systemInfoService.addBackend(
+ RebalancerTestUtil.createBackend(20003L, 2048, 0,
TStorageMedium.HDD));
+
+ // Create a table with SSD partition
+ OlapTable ssdTable = new OlapTable(3, "ssd table", new ArrayList<>(),
+ KeysType.DUP_KEYS, new RangePartitionInfo(), new
HashDistributionInfo());
+ db.registerTable(ssdTable);
+
+ MaterializedIndex ssdIndex = new MaterializedIndex(ssdTable.getId(),
null);
+ long partId = 41;
+ Partition partition = new Partition(partId, "p0", ssdIndex, new
HashDistributionInfo());
+ ssdTable.addPartition(partition);
+ ssdTable.getPartitionInfo().addPartition(partId, new
DataProperty(TStorageMedium.SSD),
+ ReplicaAllocation.DEFAULT_ALLOCATION, false, true);
+ ssdTable.setIndexMeta(ssdIndex.getId(), "ssd index",
Lists.newArrayList(new Column()),
+ 0, 0, (short) 0, TStorageType.COLUMN, KeysType.DUP_KEYS);
+
+ // Create SSD tablets: 3 replicas on BE 20001, 1 on BE 20002
+ // This creates skew = 3 - 1 = 2 among SSD BEs (with fix),
+ // or skew = 3 - 0 = 3 counting HDD-only BEs (without fix)
+ RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0",
TStorageMedium.SSD,
+ 80001, Lists.newArrayList(20001L));
+ RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0",
TStorageMedium.SSD,
+ 80002, Lists.newArrayList(20001L));
+ RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0",
TStorageMedium.SSD,
+ 80003, Lists.newArrayList(20001L));
+ RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0",
TStorageMedium.SSD,
+ 80004, Lists.newArrayList(20002L));
+
+ // Regenerate statistics with partition rebalancer
+ Config.tablet_rebalancer_type = "partition";
+ LoadStatisticForTag loadStatistic = new LoadStatisticForTag(
+ Tag.DEFAULT_BACKEND_TAG, systemInfoService, invertedIndex,
null);
+ loadStatistic.init();
+ Map<Tag, LoadStatisticForTag> ssdStatMap = Maps.newHashMap();
+ ssdStatMap.put(Tag.DEFAULT_BACKEND_TAG, loadStatistic);
+
+ PartitionRebalancer rebalancer = new
PartitionRebalancer(Env.getCurrentSystemInfo(),
+ Env.getCurrentInvertedIndex(), null);
+ rebalancer.updateLoadStatistic(ssdStatMap);
+ rebalancer.selectAlternativeTablets();
+
+ // Verify: moves were generated (test is meaningful)
+ Map<Long, Pair<PartitionRebalancer.TabletMove, Long>> moves =
rebalancer.getMovesInProgress();
+ Assert.assertFalse("Should generate moves for skewed SSD partition",
moves.isEmpty());
+
+ // Verify: no move targets BE 20003 (HDD-only) or any of the HDD BEs
from setUp (10001-10004)
+ for (Map.Entry<Long, Pair<PartitionRebalancer.TabletMove, Long>> entry
: moves.entrySet()) {
+ PartitionRebalancer.TabletMove move = entry.getValue().first;
+ Assert.assertNotEquals("Move should not target HDD-only BE for SSD
tablet",
+ Long.valueOf(20003L), move.toBe);
+ Assert.assertFalse("Move should not target any BE without SSD",
+ move.toBe == 10001L || move.toBe == 10002L
+ || move.toBe == 10003L || move.toBe == 10004L);
+ }
+
+ // Verify: all moves go from BE 20001 (most loaded) to BE 20002 (least
loaded with SSD)
+ for (Map.Entry<Long, Pair<PartitionRebalancer.TabletMove, Long>> entry
: moves.entrySet()) {
+ PartitionRebalancer.TabletMove move = entry.getValue().first;
+ Assert.assertEquals("Source should be the most loaded SSD BE",
+ Long.valueOf(20001L), move.fromBe);
+ Assert.assertEquals("Dest should be the least loaded SSD BE",
+ Long.valueOf(20002L), move.toBe);
+ }
+ LOG.info("testPartitionRebalancerSkipBEWithoutMedium success");
+ }
+
@Test
public void testMoveInProgressMap() {
Configurator.setLevel("org.apache.doris.clone.MovesInProgressCache",
Level.DEBUG);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
index 1e6af5c7324..58c021370c0 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
@@ -49,6 +49,22 @@ public class RebalancerTestUtil {
return createBackend(id, totalCap, Lists.newArrayList(usedCap), 1);
}
+ // Add only one path with specified storage medium, PathHash:id
+ public static Backend createBackend(long id, long totalCap, long usedCap,
TStorageMedium medium) {
+ Backend be = new Backend(id, "192.168.0." + id, 9051);
+ Map<String, DiskInfo> disks = Maps.newHashMap();
+ DiskInfo diskInfo = new DiskInfo("/path1");
+ diskInfo.setPathHash(id);
+ diskInfo.setTotalCapacityB(totalCap);
+ diskInfo.setDataUsedCapacityB(usedCap);
+ diskInfo.setAvailableCapacityB(totalCap - usedCap);
+ diskInfo.setStorageMedium(medium);
+ disks.put(diskInfo.getRootPath(), diskInfo);
+ be.setDisks(ImmutableMap.copyOf(disks));
+ be.setAlive(true);
+ return be;
+ }
+
/**
* size of usedCaps should equal to diskNum.
*/
diff --git
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
index 16b18c00cfa..513c0a5b3bd 100644
---
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
+++
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
@@ -517,10 +517,15 @@ class SuiteCluster {
}
List<Integer> addFrontend(int num, boolean followerMode=false) throws
Exception {
- def result = add(num, 0, null, followerMode)
+ def result = add(0, num, '', false, null)
return result.first
}
+ List<Integer> addBackend(int num, List<String> beDisks) throws Exception {
+ def result = add(0, num, '', false, beDisks)
+ return result.second
+ }
+
List<Integer> addBackend(int num, String ClusterName='') throws Exception {
def result = add(0, num, ClusterName)
return result.second
@@ -528,29 +533,33 @@ class SuiteCluster {
// ATTN: clusterName just used for cloud mode, 1 cluster has n bes
// ATTN: followerMode just used for cloud mode
- Tuple2<List<Integer>, List<Integer>> add(int feNum, int beNum, String
clusterName, boolean followerMode=false) throws Exception {
+ // ATTN: beDisks just used for not cloud mode
+ Tuple2<List<Integer>, List<Integer>> add(int feNum, int beNum, String
clusterName, boolean followerMode=false, List<String> beDisks=null) throws
Exception {
assert feNum > 0 || beNum > 0
- def sb = new StringBuilder()
- sb.append('up ' + name + ' ')
+ def cmd = ['up', name]
if (feNum > 0) {
- sb.append('--add-fe-num ' + feNum + ' ')
+ cmd += ['--add-fe-num', String.valueOf(feNum)]
if (followerMode) {
- sb.append('--fe-follower' + ' ')
+ cmd += ['--fe-follower']
}
if (sqlModeNodeMgr) {
- sb.append('--sql-mode-node-mgr' + ' ')
+ cmd += ['--sql-mode-node-mgr']
}
}
if (beNum > 0) {
- sb.append('--add-be-num ' + beNum + ' ')
+ cmd += ['--add-be-num', String.valueOf(beNum)]
if (clusterName != null && !clusterName.isEmpty()) {
- sb.append(' --be-cluster ' + clusterName + ' ')
+ cmd += ['--be-cluster', clusterName]
}
}
- sb.append('--wait-timeout 60')
+ if (beDisks != null && !beDisks.isEmpty()) {
+ cmd += ['--be-disks']
+ cmd += beDisks
+ }
+ cmd += ['--wait-timeout', '60']
- def data = (Map<String, Map<String, Object>>) runCmd(sb.toString(),
180)
+ def data = (Map<String, Map<String, Object>>) runCmdList(cmd, 180)
def newFrontends = (List<Integer>) data.get('fe').get('add_list')
def newBackends = (List<Integer>) data.get('be').get('add_list')
diff --git
a/regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy
b/regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy
new file mode 100644
index 00000000000..5aceb148914
--- /dev/null
+++
b/regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.doris.regression.suite.ClusterOptions
+
+/**
+ * Reproduce OPENSOURCE-192:
+ * When tablet_rebalancer_type=Partition, adding a new BE with only HDD disks
+ * to a cluster where tables are created with storage_medium=SSD causes
+ * the PartitionRebalancer to generate invalid moves (SSD tablets -> HDD-only
BE),
+ * resulting in infinite "paths has no available balance slot: []" errors.
+ *
+ * Root cause: In LoadStatisticForTag.init(), the beByTotalReplicaCount map
+ * for each medium includes ALL available BEs without checking hasMedium().
+ * Similarly, TabletInvertedIndex.buildPartitionInfoBySkew() includes all
+ * availableBeIds in countMap without medium filtering. This causes the
+ * greedy algorithm to generate moves targeting BEs that lack the required
+ * storage medium.
+ *
+ * Setup:
+ * - 3 initial BEs with SSD + HDD disks
+ * - Table created with storage_medium = SSD (explicitly specified)
+ * - Add 1 new BE with HDD only (via addBackend with custom beDisks)
+ * - PartitionRebalancer generates invalid moves to the HDD-only BE
+ */
+suite('test_partition_rebalancer_medium_mismatch', 'docker') {
+ if (isCloudMode()) {
+ return
+ }
+
+ def options = new ClusterOptions()
+ options.feConfigs += [
+ 'tablet_rebalancer_type=Partition',
+ 'schedule_slot_num_per_hdd_path=8',
+ 'balance_slot_num_per_path=2',
+ 'disable_balance=false',
+ 'disable_disk_balance=true',
+ 'tablet_checker_interval_ms=2000',
+ 'schedule_batch_size=1000',
+ ]
+ options.beConfigs += [
+ 'report_disk_state_interval_seconds=2',
+ 'report_tablet_interval_seconds=3',
+ ]
+ // Initial 3 BEs: each has 1 SSD + 1 HDD
+ options.beDisks = ['SSD=1', 'HDD=1']
+ options.beNum = 3
+
+ docker(options) {
+ // Step 1: Create table explicitly with SSD medium
+ def table = 'tbl_ssd_balance'
+ sql "DROP TABLE IF EXISTS ${table} FORCE"
+ sql """
+ CREATE TABLE ${table} (
+ k1 INT,
+ k2 VARCHAR(100),
+ v1 INT
+ )
+ DISTRIBUTED BY HASH(k1) BUCKETS 10
+ PROPERTIES (
+ 'replication_num' = '1',
+ 'storage_medium' = 'SSD'
+ )
+ """
+
+ // Verify partition medium is SSD
+ def partitions = sql_return_maparray "SHOW PARTITIONS FROM ${table}"
+ assertTrue(partitions.size() > 0)
+ partitions.each {
+ assertEquals('SSD', it.StorageMedium)
+ }
+ log.info("Table created with SSD medium, partitions:
${partitions.size()}")
+
+ // Step 2: Insert data to distribute tablets across existing BEs
+ for (int i = 0; i < 100; i++) {
+ sql "INSERT INTO ${table} VALUES (${i}, 'value_${i}', ${i * 10})"
+ }
+
+ def count = sql "SELECT COUNT(*) FROM ${table}"
+ assertEquals(100, count[0][0] as int)
+
+ // Record tablet distribution before expansion
+ def tabletsBefore = sql_return_maparray "SHOW TABLETS FROM ${table}"
+ log.info("Tablets before expansion: ${tabletsBefore.size()}")
+ def beIdsBefore = tabletsBefore.collect { it.BackendId }.unique()
+ log.info("Tablets on BEs: ${beIdsBefore}")
+
+ // Let scheduler settle
+ sleep(10000)
+
+ // Step 3: Add a new BE with HDD only (different disk config from
initial BEs)
+ log.info("Adding new BE with HDD-only disks...")
+ def newBeIndices = cluster.addBackend(1, ['HDD=1'])
+ log.info("New BE added with indices: ${newBeIndices}")
+
+ // Wait for new BE heartbeat and disk report
+ sleep(8000)
+
+ // Verify all backends
+ def backends = sql_return_maparray "SHOW BACKENDS"
+ log.info("Total backends after expansion: ${backends.size()}")
+ assertEquals(4, backends.size())
+
+ // Find the new BE
+ def newBeId = null
+ for (def be : backends) {
+ if (!(be.BackendId in beIdsBefore.collect { it as String })) {
+ newBeId = be.BackendId
+ break
+ }
+ }
+ assertNotNull(newBeId, "Should find new BE")
+ log.info("New BE id: ${newBeId}")
+
+ // Verify new BE has only HDD
+ def newBeDisks = sql_return_maparray "SHOW PROC '/backends/${newBeId}'"
+ log.info("New BE disks: ${newBeDisks}")
+ def hasSSD = newBeDisks.any { it.StorageMedium == 'SSD' }
+ def hasHDD = newBeDisks.any { it.StorageMedium == 'HDD' }
+ assertTrue(hasHDD, "New BE should have HDD disk")
+ assertFalse(hasSSD, "New BE should NOT have SSD disk")
+
+ // Step 4: Wait for PartitionRebalancer to attempt balance scheduling
+ // The bug: algorithm generates moves targeting the HDD-only BE for
SSD tablets
+ log.info("Waiting for PartitionRebalancer to run (60s)...")
+ sleep(60000)
+
+ // Step 5: Check balance history for the bug signature
+ def schedHistory = sql_return_maparray "SHOW PROC
'/cluster_balance/history_tablets'"
+ def failedWithEmptySlot = schedHistory.findAll {
+ it.ErrMsg != null && it.ErrMsg.contains('paths has no available
balance slot: []')
+ }
+
+ log.info("Total history entries: ${schedHistory.size()}")
+ log.info("Entries with 'empty slot' error:
${failedWithEmptySlot.size()}")
+
+ if (failedWithEmptySlot.size() > 0) {
+ log.warn("BUG REPRODUCED (OPENSOURCE-192)! " +
+ "Found ${failedWithEmptySlot.size()} balance tasks " +
+ "failed with 'paths has no available balance slot: []'")
+ failedWithEmptySlot.take(5).each { task ->
+ log.warn(" tablet=${task.TabletId}, dest=${task.DestBe},
err=${task.ErrMsg}")
+ }
+ // This assertion will fail when the bug is present, and pass
after fix
+ fail("BUG: PartitionRebalancer generated invalid moves to HDD-only
BE for SSD tablets")
+ } else {
+ log.info("No 'empty slot' failures. Bug not triggered or already
fixed.")
+ }
+
+ // Step 6: Check that no tablets moved to the new BE
+ // (since it has no SSD, SSD tablets should NOT be relocated there)
+ def tabletsAfter = sql_return_maparray "SHOW TABLETS FROM ${table}"
+ def tabletsOnNewBe = tabletsAfter.findAll { it.BackendId == newBeId }
+ log.info("Tablets on new HDD-only BE: ${tabletsOnNewBe.size()}")
+ assertEquals(0, tabletsOnNewBe.size())
+
+ // Step 7: Verify data integrity
+ def countAfter = sql "SELECT COUNT(*) FROM ${table}"
+ assertEquals(100, countAfter[0][0] as int)
+
+ // Cleanup
+ sql "DROP TABLE IF EXISTS ${table} FORCE"
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]