This is an automated email from the ASF dual-hosted git repository.
sumitagrawal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new f4262d7768 HDDS-7365. Integrate container and volume scanners. (#4849)
f4262d7768 is described below
commit f4262d7768a03bcd8b731e30f0d56e29e765929c
Author: Ethan Rose <[email protected]>
AuthorDate: Wed Jun 14 01:27:13 2023 -0700
HDDS-7365. Integrate container and volume scanners. (#4849)
---
.../container/common/impl/HddsDispatcher.java | 1 +
.../container/common/volume/StorageVolume.java | 2 +-
.../ozone/container/keyvalue/KeyValueHandler.java | 37 ++++++++++------
.../ozoneimpl/BackgroundContainerDataScanner.java | 17 +++++++-
.../BackgroundContainerMetadataScanner.java | 17 +++++++-
.../ozoneimpl/OnDemandContainerDataScanner.java | 23 +++++++---
.../ozone/container/common/ContainerTestUtils.java | 11 ++---
.../TestKeyValueHandlerWithUnhealthyContainer.java | 50 ++++++++++++++++++----
.../TestBackgroundContainerDataScanner.java | 32 ++++++++++++++
.../TestBackgroundContainerMetadataScanner.java | 41 ++++++++++++++++++
.../ozoneimpl/TestContainerScannersAbstract.java | 11 +++--
.../TestOnDemandContainerDataScanner.java | 27 ++++++++++++
12 files changed, 224 insertions(+), 45 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
index 3a11dccf47..89c39367bd 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
@@ -359,6 +359,7 @@ public class HddsDispatcher implements ContainerDispatcher,
Auditor {
|| containerState == State.RECOVERING);
// mark and persist the container state to be unhealthy
try {
+ // TODO HDDS-7096: Use on demand scanning here instead.
handler.markContainerUnhealthy(container);
LOG.info("Marked Container UNHEALTHY, ContainerID: {}", containerID);
} catch (IOException ioe) {
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
index 4ca0a7adbf..1b514056a4 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
@@ -93,7 +93,7 @@ public abstract class StorageVolume
NOT_INITIALIZED
}
- private VolumeState state;
+ private volatile VolumeState state;
// VERSION file properties
private String storageID; // id of the file system
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index f00ab2e6db..24fefc8d4c 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -1078,21 +1078,32 @@ public class KeyValueHandler extends Handler {
@Override
public void markContainerUnhealthy(Container container)
- throws IOException {
+ throws StorageContainerException {
container.writeLock();
try {
- if (container.getContainerState() != State.UNHEALTHY) {
- try {
- container.markContainerUnhealthy();
- } catch (IOException ex) {
- // explicitly catch IOException here since this operation
- // will fail if the Rocksdb metadata is corrupted.
- long id = container.getContainerData().getContainerID();
- LOG.warn("Unexpected error while marking container " + id
- + " as unhealthy", ex);
- } finally {
- sendICR(container);
- }
+ long containerID = container.getContainerData().getContainerID();
+ if (container.getContainerState() == State.UNHEALTHY) {
+ LOG.debug("Call to mark already unhealthy container {} as unhealthy",
+ containerID);
+ return;
+ }
+ // If the volume is unhealthy, no action is needed. The container has
+ // already been discarded and SCM notified. Once a volume is failed, it
+ // cannot be restored without a restart.
+ HddsVolume containerVolume = container.getContainerData().getVolume();
+ if (containerVolume.isFailed()) {
+ LOG.debug("Ignoring unhealthy container {} detected on an " +
+ "already failed volume {}", containerID, containerVolume);
+ return;
+ }
+
+ try {
+ container.markContainerUnhealthy();
+ } catch (StorageContainerException ex) {
+ LOG.warn("Unexpected error while marking container {} unhealthy",
+ containerID, ex);
+ } finally {
+ sendICR(container);
}
} finally {
container.writeUnlock();
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java
index dd9ba8212e..73539840a1 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java
@@ -70,6 +70,13 @@ public class BackgroundContainerDataScanner extends
@Override
public void scanContainer(Container<?> c) throws IOException {
+ // There is one background container data scanner per volume.
+ // If the volume fails, its scanning thread should terminate.
+ if (volume.isFailed()) {
+ shutdown("The volume has failed.");
+ return;
+ }
+
if (!shouldScan(c)) {
return;
}
@@ -111,8 +118,14 @@ public class BackgroundContainerDataScanner extends
@Override
public synchronized void shutdown() {
- this.canceler.cancel(
- String.format(NAME_FORMAT, volume) + " is shutting down");
+ shutdown("");
+ }
+
+ private synchronized void shutdown(String reason) {
+ String shutdownMessage = String.format(NAME_FORMAT, volume) + " is " +
+ "shutting down. " + reason;
+ LOG.info(shutdownMessage);
+ this.canceler.cancel(shutdownMessage);
super.shutdown();
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java
index 4296d677c9..44f1bdd102 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java
@@ -19,7 +19,9 @@ package org.apache.hadoop.ozone.container.ozoneimpl;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
+import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
+import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -56,6 +58,18 @@ public class BackgroundContainerMetadataScanner extends
@VisibleForTesting
@Override
public void scanContainer(Container<?> container) throws IOException {
+ // There is one background container metadata scanner per datanode.
+ // If this container's volume has failed, skip the container.
+ // The iterator returned by getContainerIterator may have stale results.
+ ContainerData data = container.getContainerData();
+ long containerID = data.getContainerID();
+ HddsVolume containerVolume = data.getVolume();
+ if (containerVolume.isFailed()) {
+ LOG.debug("Skipping scan of container {}. Its volume {} has failed.",
+ containerID, containerVolume);
+ return;
+ }
+
// Full data scan also does a metadata scan. If a full data scan was done
// recently, we can skip this metadata scan.
if (ContainerUtils.recentlyScanned(container, minScanGap, LOG)) {
@@ -66,8 +80,7 @@ public class BackgroundContainerMetadataScanner extends
// not a full scan.
if (!container.scanMetaData()) {
metrics.incNumUnHealthyContainers();
- controller.markContainerUnhealthy(
- container.getContainerData().getContainerID());
+ controller.markContainerUnhealthy(containerID);
}
metrics.incNumContainersScanned();
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java
index f0bf31257e..460ad50736 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
+import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -76,13 +77,22 @@ public final class OnDemandContainerDataScanner {
}
private static boolean shouldScan(Container<?> container) {
+ long containerID = container.getContainerData().getContainerID();
if (instance == null) {
LOG.debug("Skipping on demand scan for container {} since scanner was " +
- "not initialized.", container.getContainerData().getContainerID());
+ "not initialized.", containerID);
+ return false;
}
- return instance != null &&
- container.shouldScanData() &&
- !ContainerUtils.recentlyScanned(container, instance.minScanGap, LOG);
+
+ HddsVolume containerVolume = container.getContainerData().getVolume();
+ if (containerVolume.isFailed()) {
+ LOG.debug("Skipping on demand scan for container {} since its volume {}"
+
+ " has failed.", containerID, containerVolume);
+ return false;
+ }
+
+ return !ContainerUtils.recentlyScanned(container, instance.minScanGap,
+ LOG) && container.shouldScanData();
}
public static Optional<Future<?>> scanContainer(Container<?> container) {
@@ -173,8 +183,9 @@ public final class OnDemandContainerDataScanner {
private synchronized void shutdownScanner() {
instance = null;
metrics.unregister();
- this.canceler.cancel("On-demand container" +
- " scanner is shutting down.");
+ String shutdownMessage = "On-demand container scanner is shutting down.";
+ LOG.info(shutdownMessage);
+ this.canceler.cancel(shutdownMessage);
if (!scanExecutor.isShutdown()) {
scanExecutor.shutdown();
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
index afc6c3ca64..3e0ddcd67f 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
@@ -173,20 +173,15 @@ public final class ContainerTestUtils {
public static void setupMockContainer(
Container<ContainerData> c, boolean shouldScanData,
boolean scanMetaDataSuccess, boolean scanDataSuccess,
- AtomicLong containerIdSeq) {
- setupMockContainer(c, shouldScanData, scanDataSuccess, containerIdSeq);
- Mockito.lenient().when(c.scanMetaData()).thenReturn(scanMetaDataSuccess);
- }
-
- public static void setupMockContainer(
- Container<ContainerData> c, boolean shouldScanData,
- boolean scanDataSuccess, AtomicLong containerIdSeq) {
+ AtomicLong containerIdSeq, HddsVolume vol) {
ContainerData data = mock(ContainerData.class);
when(data.getContainerID()).thenReturn(containerIdSeq.getAndIncrement());
when(c.getContainerData()).thenReturn(data);
when(c.shouldScanData()).thenReturn(shouldScanData);
when(c.scanData(any(DataTransferThrottler.class), any(Canceler.class)))
.thenReturn(scanDataSuccess);
+ Mockito.lenient().when(c.scanMetaData()).thenReturn(scanMetaDataSuccess);
+ when(c.getContainerData().getVolume()).thenReturn(vol);
}
public static KeyValueContainer setUpTestContainerUnderTmpDir(
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java
index e14c67c0d6..3c9c41c777 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java
@@ -25,11 +25,15 @@ import
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.scm.pipeline.MockPipeline;
import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics;
import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
-import org.apache.hadoop.ozone.container.common.impl.TestHddsDispatcher;
+import org.apache.hadoop.ozone.container.common.interfaces.Container;
+import org.apache.hadoop.ozone.container.common.report.IncrementalReportSender;
import
org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
+import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -56,8 +60,15 @@ public class TestKeyValueHandlerWithUnhealthyContainer {
public static final Logger LOG = LoggerFactory.getLogger(
TestKeyValueHandlerWithUnhealthyContainer.class);
+ private IncrementalReportSender<Container> mockIcrSender;
+
+ @BeforeEach
+ public void init() {
+ mockIcrSender = Mockito.mock(IncrementalReportSender.class);
+ }
+
@Test
- public void testRead() throws IOException {
+ public void testRead() {
KeyValueContainer container = getMockUnhealthyContainer();
KeyValueHandler handler = getDummyHandler();
@@ -69,7 +80,7 @@ public class TestKeyValueHandlerWithUnhealthyContainer {
}
@Test
- public void testGetBlock() throws IOException {
+ public void testGetBlock() {
KeyValueContainer container = getMockUnhealthyContainer();
KeyValueHandler handler = getDummyHandler();
@@ -81,7 +92,7 @@ public class TestKeyValueHandlerWithUnhealthyContainer {
}
@Test
- public void testGetCommittedBlockLength() throws IOException {
+ public void testGetCommittedBlockLength() {
KeyValueContainer container = getMockUnhealthyContainer();
KeyValueHandler handler = getDummyHandler();
@@ -94,7 +105,7 @@ public class TestKeyValueHandlerWithUnhealthyContainer {
}
@Test
- public void testReadChunk() throws IOException {
+ public void testReadChunk() {
KeyValueContainer container = getMockUnhealthyContainer();
KeyValueHandler handler = getDummyHandler();
@@ -107,7 +118,7 @@ public class TestKeyValueHandlerWithUnhealthyContainer {
}
@Test
- public void testGetSmallFile() throws IOException {
+ public void testGetSmallFile() {
KeyValueContainer container = getMockUnhealthyContainer();
KeyValueHandler handler = getDummyHandler();
@@ -137,9 +148,31 @@ public class TestKeyValueHandlerWithUnhealthyContainer {
assertEquals(CONTAINER_INTERNAL_ERROR, response.getResult());
}
+ @Test
+ public void testMarkContainerUnhealthyInFailedVolume() throws IOException {
+ KeyValueHandler handler = getDummyHandler();
+ KeyValueContainerData mockContainerData =
mock(KeyValueContainerData.class);
+ HddsVolume mockVolume = mock(HddsVolume.class);
+ Mockito.when(mockContainerData.getVolume()).thenReturn(mockVolume);
+ KeyValueContainer container = new KeyValueContainer(
+ mockContainerData, new OzoneConfiguration());
+
+ // When volume is failed, the call to mark the container unhealthy should
+ // be ignored.
+ Mockito.when(mockVolume.isFailed()).thenReturn(true);
+ handler.markContainerUnhealthy(container);
+ Mockito.verify(mockIcrSender, Mockito.never()).send(Mockito.any());
+
+ // When volume is healthy, ICR should be sent when container is marked
+ // unhealthy.
+ Mockito.when(mockVolume.isFailed()).thenReturn(false);
+ handler.markContainerUnhealthy(container);
+ Mockito.verify(mockIcrSender, Mockito.atMostOnce()).send(Mockito.any());
+ }
+
// -- Helper methods below.
- private KeyValueHandler getDummyHandler() throws IOException {
+ private KeyValueHandler getDummyHandler() {
DatanodeDetails dnDetails = DatanodeDetails.newBuilder()
.setUuid(UUID.fromString(DATANODE_UUID))
.setHostName("dummyHost")
@@ -153,7 +186,7 @@ public class TestKeyValueHandlerWithUnhealthyContainer {
stateMachine.getDatanodeDetails().getUuidString(),
mock(ContainerSet.class),
mock(MutableVolumeSet.class),
- mock(ContainerMetrics.class), TestHddsDispatcher.NO_OP_ICR_SENDER);
+ mock(ContainerMetrics.class), mockIcrSender);
}
private KeyValueContainer getMockUnhealthyContainer() {
@@ -165,5 +198,4 @@ public class TestKeyValueHandlerWithUnhealthyContainer {
.ContainerDataProto.newBuilder().setContainerID(1).build());
return new KeyValueContainer(containerData, new OzoneConfiguration());
}
-
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
index dc39ee2a9d..dea5d344df 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
@@ -19,7 +19,9 @@
*/
package org.apache.hadoop.ozone.container.ozoneimpl;
+import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.BeforeEach;
import org.mockito.Mockito;
@@ -131,4 +133,34 @@ public class TestBackgroundContainerDataScanner extends
.updateDataScanTimestamp(
eq(corruptData.getContainerData().getContainerID()), any());
}
+
+ /**
+ * A datanode will have one background data scanner per volume. When the
+ * volume fails, the scanner thread should be terminated.
+ */
+ @Test
+ @Override
+ // Override findbugs warning about Mockito.verify
+ @SuppressFBWarnings("RV_RETURN_VALUE_IGNORED_NO_SIDE_EFFECT")
+ public void testWithVolumeFailure() throws Exception {
+ Mockito.when(vol.isFailed()).thenReturn(true);
+ // Run the scanner thread in the background. It should be terminated on
+ // the first iteration because the volume is unhealthy.
+ ContainerDataScannerMetrics metrics = scanner.getMetrics();
+ scanner.start();
+ GenericTestUtils.waitFor(() -> !scanner.isAlive(), 1000, 5000);
+
+ // Volume health should have been checked.
+ Mockito.verify(vol, atLeastOnce()).isFailed();
+ // No iterations should have been run.
+ assertEquals(0, metrics.getNumScanIterations());
+ assertEquals(0, metrics.getNumContainersScanned());
+ assertEquals(0, metrics.getNumUnHealthyContainers());
+ // All containers were on the unhealthy volume, so they should not have
+ // been scanned.
+ Mockito.verify(healthy, never()).scanData(any(), any());
+ Mockito.verify(openContainer, never()).scanData(any(), any());
+ Mockito.verify(corruptData, never()).scanData(any(), any());
+ Mockito.verify(openCorruptMetadata, never()).scanData(any(), any());
+ }
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
index 8ae61a1a27..5fe9975bdd 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
@@ -19,7 +19,9 @@
*/
package org.apache.hadoop.ozone.container.ozoneimpl;
+import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.BeforeEach;
import org.mockito.Mockito;
@@ -31,6 +33,7 @@ import java.util.Optional;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.atLeastOnce;
import static org.mockito.Mockito.never;
@@ -111,4 +114,42 @@ public class TestBackgroundContainerMetadataScanner extends
verifyContainerMarkedUnhealthy(openCorruptMetadata, atLeastOnce());
verifyContainerMarkedUnhealthy(openContainer, never());
}
+
+ /**
+ * A datanode will have one metadata scanner thread for the whole process.
+ * When a volume fails, any the containers queued for scanning in that volume
+ * should be skipped but the thread will continue to run.
+ */
+ @Test
+ @Override
+ // Override findbugs warning about Mockito.verify
+ @SuppressFBWarnings("RV_RETURN_VALUE_IGNORED_NO_SIDE_EFFECT")
+ public void testWithVolumeFailure() throws Exception {
+ Mockito.when(vol.isFailed()).thenReturn(true);
+
+ ContainerMetadataScannerMetrics metrics = scanner.getMetrics();
+ // Start metadata scanner thread in the background.
+ scanner.start();
+ // Wait for at least one iteration to complete.
+ GenericTestUtils.waitFor(() -> metrics.getNumScanIterations() >= 1, 1000,
+ 5000);
+ // Volume health should have been checked.
+ Mockito.verify(vol, atLeastOnce()).isFailed();
+ // Scanner should not have shutdown when it encountered the failed volume.
+ assertTrue(scanner.isAlive());
+
+ // Now explicitly shutdown the scanner.
+ scanner.shutdown();
+ // Wait for shutdown to finish.
+ GenericTestUtils.waitFor(() -> !scanner.isAlive(), 1000, 5000);
+
+ // All containers were on the failed volume, so they should not have
+ // been scanned.
+ assertEquals(0, metrics.getNumContainersScanned());
+ assertEquals(0, metrics.getNumUnHealthyContainers());
+ Mockito.verify(healthy, never()).scanMetaData();
+ Mockito.verify(openContainer, never()).scanMetaData();
+ Mockito.verify(corruptData, never()).scanMetaData();
+ Mockito.verify(openCorruptMetadata, never()).scanMetaData();
+ }
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
index c9ad665996..08a5b65136 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
@@ -97,6 +97,9 @@ public abstract class TestContainerScannersAbstract {
@Test
public abstract void testScannerMetricsUnregisters() throws Exception;
+ @Test
+ public abstract void testWithVolumeFailure() throws Exception;
+
// HELPER METHODS
protected void setScannedTimestampOld(Container<ContainerData> container) {
@@ -129,20 +132,20 @@ public abstract class TestContainerScannersAbstract {
private ContainerController mockContainerController() {
// healthy container
ContainerTestUtils.setupMockContainer(healthy,
- true, true, true, CONTAINER_SEQ_ID);
+ true, true, true, CONTAINER_SEQ_ID, vol);
// Open container (only metadata can be scanned)
ContainerTestUtils.setupMockContainer(openContainer,
- false, true, false, CONTAINER_SEQ_ID);
+ false, true, false, CONTAINER_SEQ_ID, vol);
// unhealthy container (corrupt data)
ContainerTestUtils.setupMockContainer(corruptData,
- true, true, false, CONTAINER_SEQ_ID);
+ true, true, false, CONTAINER_SEQ_ID, vol);
// unhealthy container (corrupt metadata). To simulate container still
// being open while metadata is corrupted, shouldScanData will return
false.
ContainerTestUtils.setupMockContainer(openCorruptMetadata,
- false, false, false, CONTAINER_SEQ_ID);
+ false, false, false, CONTAINER_SEQ_ID, vol);
Collection<Container<?>> containers = Arrays.asList(
healthy, corruptData, openCorruptMetadata);
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java
index 033688a693..8f1f2f2f90 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java
@@ -190,6 +190,33 @@ public class TestOnDemandContainerDataScanner extends
verifyContainerMarkedUnhealthy(openContainer, never());
}
+ /**
+ * A datanode will have one on-demand scanner thread for the whole process.
+ * When a volume fails, any the containers queued for scanning in that volume
+ * should be skipped but the thread will continue to run and accept new
+ * containers to scan.
+ */
+ @Test
+ @Override
+ public void testWithVolumeFailure() throws Exception {
+ Mockito.when(vol.isFailed()).thenReturn(true);
+
+ OnDemandContainerDataScanner.init(conf, controller);
+ OnDemandScannerMetrics metrics = OnDemandContainerDataScanner.getMetrics();
+
+ scanContainer(healthy);
+ verifyContainerMarkedUnhealthy(healthy, never());
+ scanContainer(corruptData);
+ verifyContainerMarkedUnhealthy(corruptData, never());
+ scanContainer(openCorruptMetadata);
+ verifyContainerMarkedUnhealthy(openCorruptMetadata, never());
+ scanContainer(openContainer);
+ verifyContainerMarkedUnhealthy(openContainer, never());
+
+ assertEquals(0, metrics.getNumContainersScanned());
+ assertEquals(0, metrics.getNumUnHealthyContainers());
+ }
+
private void scanContainer(Container<ContainerData> container)
throws Exception {
OnDemandContainerDataScanner.init(conf, controller);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]