This is an automated email from the ASF dual-hosted git repository. erose pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push: new 498a9c14ddc HDDS-13092. Container scanner should trigger volume scan when marking a container unhealthy (#8603) 498a9c14ddc is described below commit 498a9c14ddc69c7f36dc4eb9c87b42443c6a3ca1 Author: Tejaskriya <87555809+tejaskr...@users.noreply.github.com> AuthorDate: Tue Jul 22 03:59:28 2025 +0530 HDDS-13092. Container scanner should trigger volume scan when marking a container unhealthy (#8603) Co-authored-by: Doroszlai, Attila <adorosz...@apache.org> --- .../container/ozoneimpl/ContainerScanHelper.java | 26 ++++++++++++++++++---- .../TestBackgroundContainerDataScanner.java | 15 +++++++++++++ .../TestBackgroundContainerMetadataScanner.java | 17 ++++++++++++++ .../ozoneimpl/TestContainerScannersAbstract.java | 8 +++++++ .../ozoneimpl/TestOnDemandContainerScanner.java | 11 +++++++++ 5 files changed, 73 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java index c0e16c7de93..4c4a45c55d4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java @@ -26,6 +26,7 @@ import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; +import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.slf4j.Logger; @@ -75,7 +76,7 @@ public void scanData(Container<?> container, DataTransferThrottler throttler, Ca log.warn("Failed to update container checksum after scan of container {}", containerId, ex); } if (result.hasErrors()) { - handleUnhealthyScanResult(containerId, result); + handleUnhealthyScanResult(containerData, result); } metrics.incNumContainersScanned(); } @@ -103,7 +104,7 @@ public void scanMetadata(Container<?> container) return; } if (result.hasErrors()) { - handleUnhealthyScanResult(containerId, result); + handleUnhealthyScanResult(containerData, result); } Instant now = Instant.now(); @@ -114,8 +115,8 @@ public void scanMetadata(Container<?> container) logScanCompleted(containerData, now); } - public void handleUnhealthyScanResult(long containerID, ScanResult result) throws IOException { - + public void handleUnhealthyScanResult(ContainerData containerData, ScanResult result) throws IOException { + long containerID = containerData.getContainerID(); log.error("Corruption detected in container [{}]. Marking it UNHEALTHY. {}", containerID, result); if (log.isDebugEnabled()) { StringBuilder allErrorString = new StringBuilder(); @@ -130,6 +131,23 @@ public void handleUnhealthyScanResult(long containerID, ScanResult result) throw boolean containerMarkedUnhealthy = controller.markContainerUnhealthy(containerID, result); if (containerMarkedUnhealthy) { metrics.incNumUnHealthyContainers(); + // triggering a volume scan for the unhealthy container + triggerVolumeScan(containerData); + } + } + + public void triggerVolumeScan(ContainerData containerData) { + HddsVolume volume = containerData.getVolume(); + if (volume != null && !volume.isFailed()) { + log.info("Triggering scan of volume [{}] with unhealthy container [{}]", + volume, containerData.getContainerID()); + StorageVolumeUtil.onFailure(volume); + } else if (volume == null) { + log.warn("Cannot trigger volume scan for container {} since its volume is null", + containerData.getContainerID()); + } else { + log.debug("Skipping volume scan for container {} since its volume {} has failed.", + containerData.getContainerID(), volume); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java index 93ef66680d8..508c472a7c8 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java @@ -32,6 +32,7 @@ import static org.mockito.Mockito.atMostOnce; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -48,9 +49,12 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; +import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.mockito.MockedStatic; import org.mockito.junit.jupiter.MockitoSettings; import org.mockito.quality.Strictness; @@ -133,6 +137,17 @@ public void testUnhealthyContainersDetected() throws Exception { verifyContainerMarkedUnhealthy(deletedContainer, never()); } + @Test + @Override + public void testUnhealthyContainersTriggersVolumeScan() throws Exception { + when(controller.markContainerUnhealthy(anyLong(), any(ScanResult.class))).thenReturn(true); + try (MockedStatic<StorageVolumeUtil> mockedStatic = mockStatic(StorageVolumeUtil.class)) { + scanner.runIteration(); + verifyContainerMarkedUnhealthy(corruptData, atLeastOnce()); + mockedStatic.verify(() -> StorageVolumeUtil.onFailure(corruptData.getContainerData().getVolume()), times(1)); + } + } + @Test public void testScanTimestampUpdated() throws Exception { scanner.runIteration(); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java index abc3126f762..9b6c6aed3f0 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java @@ -32,7 +32,9 @@ import static org.mockito.Mockito.atMost; import static org.mockito.Mockito.atMostOnce; import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -45,9 +47,12 @@ import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; +import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.mockito.MockedStatic; import org.mockito.junit.jupiter.MockitoSettings; import org.mockito.quality.Strictness; @@ -130,6 +135,18 @@ public void testUnhealthyContainersDetected() throws Exception { verifyContainerMarkedUnhealthy(openContainer, never()); } + @Test + @Override + public void testUnhealthyContainersTriggersVolumeScan() throws Exception { + when(controller.markContainerUnhealthy(anyLong(), any(ScanResult.class))).thenReturn(true); + try (MockedStatic<StorageVolumeUtil> mockedStatic = mockStatic(StorageVolumeUtil.class)) { + scanner.runIteration(); + verifyContainerMarkedUnhealthy(openCorruptMetadata, atLeastOnce()); + mockedStatic.verify(() -> + StorageVolumeUtil.onFailure(openCorruptMetadata.getContainerData().getVolume()), times(1)); + } + } + @Test @Override public void testUnhealthyContainerRescanned() throws Exception { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java index f3537480970..7bd45c3b503 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java @@ -31,6 +31,7 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.io.File; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.ArrayList; @@ -130,6 +131,9 @@ public abstract void testPreviouslyScannedContainerIsScanned() @Test public abstract void testChecksumUpdateFailure() throws Exception; + @Test + public abstract void testUnhealthyContainersTriggersVolumeScan() throws Exception; + // HELPER METHODS protected void setScannedTimestampOld(Container<ContainerData> container) { @@ -198,6 +202,10 @@ private ContainerController mockContainerController() { MetadataScanResult healthyMetadata = getHealthyMetadataScanResult(); MetadataScanResult unhealthyMetadata = getUnhealthyMetadataScanResult(); + File volLocation = mock(File.class); + when(volLocation.getPath()).thenReturn("/temp/volume-testcontainerscanner"); + when(vol.getStorageDir()).thenReturn(volLocation); + // healthy container ContainerTestUtils.setupMockContainer(healthy, true, healthyMetadata, healthyData, diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerScanner.java index 98a65ca761d..8bd8f2060ba 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerScanner.java @@ -53,6 +53,7 @@ import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; +import org.apache.ozone.test.GenericTestUtils.LogCapturer; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -347,6 +348,16 @@ public void testMerkleTreeWritten() throws Exception { } } + @Test + @Override + public void testUnhealthyContainersTriggersVolumeScan() throws Exception { + when(controller.markContainerUnhealthy(anyLong(), any(ScanResult.class))).thenReturn(true); + LogCapturer logCapturer = LogCapturer.captureLogs(OnDemandContainerScanner.class); + scanContainer(corruptData); + verifyContainerMarkedUnhealthy(corruptData, times(1)); + assertTrue(logCapturer.getOutput().contains("Triggering scan of volume")); + } + private void scanContainer(Container<?> container) throws Exception { Optional<Future<?>> scanFuture = onDemandScanner.scanContainer(container); if (scanFuture.isPresent()) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@ozone.apache.org For additional commands, e-mail: commits-h...@ozone.apache.org