This is an automated email from the ASF dual-hosted git repository.
sammichen pushed a commit to branch HDDS-5713
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/HDDS-5713 by this push:
new 27dcc200399 HDDS-14114. [DiskBalancer] Fix DiskBalancer tmp directory
to be under hdds/<cluster-id>/tmp (#9515)
27dcc200399 is described below
commit 27dcc200399480855dca3d38f48a2d1f01c9161e
Author: Gargi Jaiswal <[email protected]>
AuthorDate: Tue Jan 6 17:47:33 2026 +0530
HDDS-14114. [DiskBalancer] Fix DiskBalancer tmp directory to be under
hdds/<cluster-id>/tmp (#9515)
---
.../diskbalancer/DiskBalancerService.java | 66 +++++++--
.../diskbalancer/TestDiskBalancerService.java | 154 +++++++++++++++++++++
2 files changed, 205 insertions(+), 15 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
index 957f187491f..aaa14321011 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.ozone.container.diskbalancer;
import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_ALREADY_EXISTS;
+import static
org.apache.hadoop.ozone.container.common.volume.StorageVolume.TMP_DIR_NAME;
import static
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation.calculateVolumeDataDensity;
import static
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation.getIdealUsage;
import static
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation.getVolumeUsages;
@@ -66,7 +67,6 @@
import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
-import org.apache.hadoop.ozone.container.common.volume.StorageVolume;
import
org.apache.hadoop.ozone.container.common.volume.VolumeChoosingPolicyFactory;
import
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation.VolumeFixedUsage;
import
org.apache.hadoop.ozone.container.diskbalancer.policy.ContainerChoosingPolicy;
@@ -165,8 +165,6 @@ public DiskBalancerService(OzoneContainer ozoneContainer,
metrics = DiskBalancerServiceMetrics.create();
loadDiskBalancerInfo();
-
- constructTmpDir();
}
/**
@@ -178,17 +176,45 @@ public synchronized void refresh(DiskBalancerInfo
diskBalancerInfo) throws IOExc
applyDiskBalancerInfo(diskBalancerInfo);
}
- private void constructTmpDir() throws IOException {
+ /**
+ * Cleans up stale diskBalancer temporary directories on startup.
+ *
+ * @throws IOException if cleanup fails
+ */
+ private void cleanupTmpDir() throws IOException {
for (HddsVolume volume:
StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList())) {
- Path tmpDir = getDiskBalancerTmpDir(volume);
+ Path diskBalancerTmpDir = null;
try {
- FileUtils.deleteDirectory(tmpDir.toFile());
- FileUtils.forceMkdir(tmpDir.toFile());
+ File tmpDir = volume.getTmpDir();
+ if (tmpDir != null) {
+ // If tmpDir is initialized, use it directly
+ diskBalancerTmpDir = tmpDir.toPath().resolve(DISK_BALANCER_DIR);
+ } else {
+ // If tmpDir is not initialized, construct the path manually
+ // This handles the case where stale directories exist from previous
+ // failed moves even though volumes haven't been initialized yet
+ String clusterId = volume.getClusterID();
+ if (clusterId == null) {
+ // Skip volumes without clusterID - they're not properly formatted
+ continue;
+ }
+ String workDirName = volume.getWorkingDirName();
+ if (workDirName == null) {
+ workDirName = clusterId;
+ }
+ diskBalancerTmpDir = Paths.get(volume.getStorageDir().toString(),
+ workDirName, TMP_DIR_NAME, DISK_BALANCER_DIR);
+ }
+
+ // Clean up any existing diskBalancer directory from previous runs
+ if (diskBalancerTmpDir.toFile().exists()) {
+ FileUtils.deleteDirectory(diskBalancerTmpDir.toFile());
+ LOG.info("Cleaned up stale diskBalancer tmp directory: {}",
diskBalancerTmpDir);
+ }
} catch (IOException ex) {
- LOG.warn("Can not reconstruct tmp directory under volume {}", volume,
- ex);
- throw ex;
+ LOG.warn("Failed to clean up diskBalancer tmp directory under volume
{}: {}",
+ volume, diskBalancerTmpDir, ex);
}
}
}
@@ -341,6 +367,17 @@ public void setVersion(DiskBalancerVersion version) {
this.version = version;
}
+ @Override
+ public synchronized void start() {
+ // Clean up any stale diskBalancer tmp directories from previous runs
+ try {
+ cleanupTmpDir();
+ } catch (IOException e) {
+ LOG.warn("Failed to clean up diskBalancer tmp directories before
starting service", e);
+ }
+ super.start();
+ }
+
@Override
public BackgroundTaskQueue getTasks() {
BackgroundTaskQueue queue = new BackgroundTaskQueue();
@@ -477,8 +514,8 @@ public BackgroundTaskResult call() {
container.readLock();
try {
// Step 1: Copy container to new Volume's tmp Dir
- diskBalancerTmpDir = destVolume.getTmpDir().toPath()
- .resolve(DISK_BALANCER_DIR).resolve(String.valueOf(containerId));
+ diskBalancerTmpDir = getDiskBalancerTmpDir(destVolume)
+ .resolve(String.valueOf(containerId));
ozoneContainer.getController().copyContainer(containerData,
diskBalancerTmpDir);
// Step 2: verify checksum and Transition Temp container to Temp
C1-RECOVERING
@@ -697,9 +734,8 @@ public long calculateBytesToMove(List<VolumeFixedUsage>
inputVolumeSet) {
return totalBytesToMove;
}
- private Path getDiskBalancerTmpDir(HddsVolume hddsVolume) {
- return Paths.get(hddsVolume.getVolumeRootDir())
- .resolve(StorageVolume.TMP_DIR_NAME).resolve(DISK_BALANCER_DIR);
+ private Path getDiskBalancerTmpDir(HddsVolume hddsVolume) throws IOException
{
+ return hddsVolume.getTmpDir().toPath().resolve(DISK_BALANCER_DIR);
}
public DiskBalancerServiceMetrics getMetrics() {
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
index 012cd3742eb..fc1980e579e 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
@@ -18,9 +18,11 @@
package org.apache.hadoop.ozone.container.diskbalancer;
import static
org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded;
+import static
org.apache.hadoop.ozone.container.common.volume.StorageVolume.TMP_DIR_NAME;
import static
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation.getVolumeUsages;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
@@ -52,6 +54,7 @@
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
import org.apache.hadoop.ozone.container.common.volume.StorageVolume;
+import org.apache.hadoop.ozone.container.common.volume.VolumeSet;
import
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation.VolumeFixedUsage;
import
org.apache.hadoop.ozone.container.diskbalancer.policy.ContainerChoosingPolicy;
import
org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultContainerChoosingPolicy;
@@ -114,6 +117,45 @@ public void cleanup() throws IOException {
volumeSet.shutdown();
}
+ /**
+ * Creates stale diskBalancer directories to simulate leftover directories
+ * from previous failed container moves.
+ *
+ * @param volumeSet the volume set containing volumes to create stale dirs
for
+ * @param clusterId the cluster ID to use when constructing paths for
uninitialized volumes
+ * @throws IOException if directory creation fails
+ */
+ private void createStaleDiskBalancerDirs(VolumeSet volSet, String clusterId)
+ throws IOException {
+ List<StorageVolume> volumes = volSet.getVolumesList();
+ for (StorageVolume volume : volumes) {
+ if (volume instanceof HddsVolume) {
+ HddsVolume hddsVolume = (HddsVolume) volume;
+ File staleDiskBalancerDir;
+
+ File volumeTmpDir = hddsVolume.getTmpDir();
+ if (volumeTmpDir != null) {
+ // If tmpDir is initialized, use it directly
+ staleDiskBalancerDir = new File(volumeTmpDir,
DiskBalancerService.DISK_BALANCER_DIR);
+ } else {
+ // If tmpDir is not initialized, construct the path manually
+ File clusterIdDir = new File(hddsVolume.getHddsRootDir(), clusterId);
+ File tmpDirPath = new File(clusterIdDir, TMP_DIR_NAME);
+ staleDiskBalancerDir = new File(tmpDirPath,
DiskBalancerService.DISK_BALANCER_DIR);
+ }
+
+ // Create stale directory with some content
+ assertTrue(staleDiskBalancerDir.mkdirs(),
+ "Failed to create stale diskBalancer directory: " +
staleDiskBalancerDir.getAbsolutePath());
+ File staleContainerDir = new File(staleDiskBalancerDir, "12345");
+ assertTrue(staleContainerDir.mkdirs());
+ // Verify stale directory exists before cleanup
+ assertTrue(staleDiskBalancerDir.exists(),
+ "Stale diskBalancer directory should exist before cleanup");
+ }
+ }
+ }
+
@ContainerTestVersionInfo.ContainerTest
public void testUpdateService(ContainerTestVersionInfo versionInfo) throws
Exception {
setLayoutAndSchemaForTest(versionInfo);
@@ -361,4 +403,116 @@ public void
testDiskBalancerConfigurationThresholdValidation(double threshold,
assertEquals(expectedThreshold, config.getThreshold(), 0.0001);
}
}
+
+ @ContainerTestVersionInfo.ContainerTest
+ public void testDiskBalancerCleansUpStaleTmpDir(ContainerTestVersionInfo
versionInfo) throws Exception {
+ setLayoutAndSchemaForTest(versionInfo);
+ // Start volumes to initialize tmp directories
+ volumeSet.startAllVolume();
+
+ ContainerSet containerSet = ContainerSet.newReadOnlyContainerSet(1000);
+ ContainerMetrics metrics = ContainerMetrics.create(conf);
+ KeyValueHandler keyValueHandler =
+ new KeyValueHandler(conf, datanodeUuid, containerSet, volumeSet,
+ metrics, c -> {
+ }, new ContainerChecksumTreeManager(conf));
+
+ // Create stale diskBalancer directories to simulate leftover from
previous run
+ createStaleDiskBalancerDirs(volumeSet, scmId);
+
+ // Use actual DiskBalancerService (not TestImpl) to test the real start()
method
+ OzoneContainer ozoneContainer = mockDependencies(containerSet,
keyValueHandler, null);
+ DiskBalancerService svc = new DiskBalancerService(ozoneContainer, 1000,
1000,
+ TimeUnit.MILLISECONDS, 1, conf);
+
+ // Start the service, which should clean up stale tmp directories via
cleanupTmpDir()
+ svc.start();
+
+ // Verify stale diskBalancer tmp directories are cleaned up
+ for (StorageVolume volume : volumeSet.getVolumesList()) {
+ if (volume instanceof HddsVolume) {
+ HddsVolume hddsVolume = (HddsVolume) volume;
+ File volumeTmpDir = hddsVolume.getTmpDir();
+ File diskBalancerTmpDir = new File(volumeTmpDir,
DiskBalancerService.DISK_BALANCER_DIR);
+
+ // Verify stale directory is cleaned up (should not exist)
+ assertFalse(diskBalancerTmpDir.exists(),
+ "Stale diskBalancer tmp directory should be cleaned up on
startup");
+ }
+ }
+
+ svc.shutdown();
+ }
+
+ @ContainerTestVersionInfo.ContainerTest
+ public void
testDiskBalancerCleanupWhenTmpDirNotInitialized(ContainerTestVersionInfo
versionInfo) throws Exception {
+ setLayoutAndSchemaForTest(versionInfo);
+ // Create a fresh volume set WITHOUT calling
createDbInstancesForTestIfNeeded
+ // This simulates volumes that are formatted but tmpDir is not initialized
+ MutableVolumeSet testVolumeSet = new MutableVolumeSet(datanodeUuid, scmId,
conf, null,
+ StorageVolume.VolumeType.DATA_VOLUME, null);
+
+ // Format volumes and ensure clusterID directory exists, but DON'T create
tmp dirs
+ // This simulates the scenario where tmpDir is null
+ for (StorageVolume volume : testVolumeSet.getVolumesList()) {
+ if (volume instanceof HddsVolume) {
+ HddsVolume hddsVolume = (HddsVolume) volume;
+ // Format volume to set clusterID
+ hddsVolume.format(scmId);
+ // Manually create the clusterID directory (needed for tmpDir creation)
+ // but don't call createWorkingDir() or createTmpDirs()
+ File clusterIdDir = new File(hddsVolume.getHddsRootDir(), scmId);
+ if (!clusterIdDir.exists()) {
+ assertTrue(clusterIdDir.mkdirs(),
+ "Failed to create clusterID directory: " +
clusterIdDir.getAbsolutePath());
+ }
+ // Verify tmpDir is null (not initialized)
+ assertNull(hddsVolume.getTmpDir());
+ }
+ }
+
+ // Create stale diskBalancer directories manually to simulate leftover
from failed move
+ // This tests the scenario where stale dirs exist even though tmpDir is
not initialized
+ createStaleDiskBalancerDirs(testVolumeSet, scmId);
+
+ ContainerSet containerSet = ContainerSet.newReadOnlyContainerSet(1000);
+ ContainerMetrics metrics = ContainerMetrics.create(conf);
+ KeyValueHandler keyValueHandler =
+ new KeyValueHandler(conf, datanodeUuid, containerSet, testVolumeSet,
+ metrics, c -> {
+ }, new ContainerChecksumTreeManager(conf));
+
+ // Use actual DiskBalancerService (not TestImpl) to test the real start()
method
+ OzoneContainer ozoneContainer = mockDependencies(containerSet,
keyValueHandler, null);
+ // Override getVolumeSet to return our test volume set
+ when(ozoneContainer.getVolumeSet()).thenReturn(testVolumeSet);
+
+ DiskBalancerService svc = new DiskBalancerService(ozoneContainer, 1000,
1000,
+ TimeUnit.MILLISECONDS, 1, conf);
+
+ // Start the service - cleanup should handle volumes with uninitialized
tmpDir
+ // and clean up stale directories even when tmpDir is null
+ svc.start();
+
+ // Verify stale directories are cleaned up even though tmpDir is not
initialized
+ List<StorageVolume> volumes = testVolumeSet.getVolumesList();
+ for (StorageVolume volume : volumes) {
+ if (volume instanceof HddsVolume) {
+ HddsVolume hddsVolume = (HddsVolume) volume;
+ // tmpDir should still be null - cleanup doesn't initialize it
+ assertNull(hddsVolume.getTmpDir(),
+ "tmpDir should not be initialized by cleanup, it will be created
lazily");
+
+ // Verify stale diskBalancer directory is cleaned up
+ File hddsRootDir = hddsVolume.getHddsRootDir();
+ File expectedDiskBalancerTmpDir = new File(new File(hddsRootDir,
scmId),
+ TMP_DIR_NAME + File.separator +
DiskBalancerService.DISK_BALANCER_DIR);
+ assertFalse(expectedDiskBalancerTmpDir.exists(),
+ "Stale diskBalancer directory should be cleaned up even when
tmpDir is not initialized");
+ }
+ }
+
+ svc.shutdown();
+ testVolumeSet.shutdown();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]