This is an automated email from the ASF dual-hosted git repository.
sumitagrawal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 83f3fd9d03 HDDS-8927. Metadata scanner should not scan unhealthy
containers. (#4976)
83f3fd9d03 is described below
commit 83f3fd9d03cbae31a71f391545bc18b729f43e0d
Author: Ethan Rose <[email protected]>
AuthorDate: Mon Jun 26 09:44:03 2023 -0700
HDDS-8927. Metadata scanner should not scan unhealthy containers. (#4976)
---
.../container/common/interfaces/Container.java | 6 +++
.../container/keyvalue/KeyValueContainer.java | 18 +++++++-
.../BackgroundContainerMetadataScanner.java | 11 +++--
.../ozone/container/common/ContainerTestUtils.java | 1 +
.../TestBackgroundContainerDataScanner.java | 46 +++++++++++++++++++
.../TestBackgroundContainerMetadataScanner.java | 48 ++++++++++++++++++++
.../ozoneimpl/TestContainerScannersAbstract.java | 53 ++++++++++++++++++++--
.../TestOnDemandContainerDataScanner.java | 45 +++++++++++++++++-
8 files changed, 218 insertions(+), 10 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java
index 9dfd94dcb4..9a17d1fc71 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java
@@ -163,6 +163,12 @@ public interface Container<CONTAINERDATA extends
ContainerData> extends RwLock {
*/
long getBlockCommitSequenceId();
+ /**
+ * Returns if the container metadata should be checked. The result depends
+ * on the state of the container.
+ */
+ boolean shouldScanMetadata();
+
/**
* check and report the structural integrity of the container.
* @return true if the integrity checks pass
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
index 079da021a9..702a9315ec 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
@@ -862,6 +862,19 @@ public class KeyValueContainer implements
Container<KeyValueContainerData> {
*/
public File getContainerDBFile() {
return KeyValueContainerLocationUtil.getContainerDBFile(containerData);
+
+ }
+
+ @Override
+ public boolean shouldScanMetadata() {
+ boolean shouldScan =
+ getContainerState() != ContainerDataProto.State.UNHEALTHY;
+ if (!shouldScan && LOG.isDebugEnabled()) {
+ LOG.debug("Container {} in state {} should not have its metadata " +
+ "scanned.",
+ containerData.getContainerID(), containerData.getState());
+ }
+ return shouldScan;
}
@Override
@@ -876,12 +889,13 @@ public class KeyValueContainer implements
Container<KeyValueContainerData> {
@Override
public boolean shouldScanData() {
boolean shouldScan =
- containerData.getState() == ContainerDataProto.State.CLOSED
- || containerData.getState() == ContainerDataProto.State.QUASI_CLOSED;
+ getContainerState() == ContainerDataProto.State.CLOSED
+ || getContainerState() == ContainerDataProto.State.QUASI_CLOSED;
if (!shouldScan && LOG.isDebugEnabled()) {
LOG.debug("Container {} in state {} should not have its data scanned.",
containerData.getContainerID(), containerData.getState());
}
+
return shouldScan;
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java
index 44f1bdd102..ee67b68d3d 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java
@@ -70,9 +70,7 @@ public class BackgroundContainerMetadataScanner extends
return;
}
- // Full data scan also does a metadata scan. If a full data scan was done
- // recently, we can skip this metadata scan.
- if (ContainerUtils.recentlyScanned(container, minScanGap, LOG)) {
+ if (!shouldScan(container)) {
return;
}
@@ -89,4 +87,11 @@ public class BackgroundContainerMetadataScanner extends
public ContainerMetadataScannerMetrics getMetrics() {
return this.metrics;
}
+
+ private boolean shouldScan(Container<?> container) {
+ // Full data scan also does a metadata scan. If a full data scan was done
+ // recently, we can skip this metadata scan.
+ return container.shouldScanMetadata() &&
+ !ContainerUtils.recentlyScanned(container, minScanGap, LOG);
+ }
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
index 71310e3032..513197b10b 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
@@ -180,6 +180,7 @@ public final class ContainerTestUtils {
when(c.shouldScanData()).thenReturn(shouldScanData);
when(c.scanData(any(DataTransferThrottler.class), any(Canceler.class)))
.thenReturn(scanDataSuccess);
+ when(c.shouldScanMetadata()).thenReturn(true);
Mockito.lenient().when(c.scanMetaData()).thenReturn(scanMetaDataSuccess);
when(c.getContainerData().getVolume()).thenReturn(vol);
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
index dea5d344df..eb644d0350 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
@@ -20,7 +20,10 @@
package org.apache.hadoop.ozone.container.ozoneimpl;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
+import org.apache.hadoop.hdfs.util.Canceler;
+import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.BeforeEach;
@@ -30,13 +33,17 @@ import org.mockito.quality.Strictness;
import java.util.Optional;
+import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY;
+import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.atMostOnce;
import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.when;
/**
* Unit tests for the background container data scanner.
@@ -134,6 +141,45 @@ public class TestBackgroundContainerDataScanner extends
eq(corruptData.getContainerData().getContainerID()), any());
}
+ @Test
+ @Override
+ public void testUnhealthyContainerNotRescanned() throws Exception {
+ Container<?> unhealthy = mockKeyValueContainer();
+ when(unhealthy.scanMetaData()).thenReturn(true);
+ when(unhealthy.scanData(any(DataTransferThrottler.class),
+ any(Canceler.class))).thenReturn(false);
+
+ setContainers(unhealthy, healthy);
+
+ // First iteration should find the unhealthy container.
+ scanner.runIteration();
+ verifyContainerMarkedUnhealthy(unhealthy, atMostOnce());
+ ContainerDataScannerMetrics metrics = scanner.getMetrics();
+ assertEquals(1, metrics.getNumScanIterations());
+ assertEquals(2, metrics.getNumContainersScanned());
+ assertEquals(1, metrics.getNumUnHealthyContainers());
+
+ // The unhealthy container should have been moved to the unhealthy state.
+ Mockito.verify(unhealthy.getContainerData(), atMostOnce())
+ .setState(UNHEALTHY);
+ // Update the mock to reflect this.
+ Mockito.when(unhealthy.getContainerState()).thenReturn(UNHEALTHY);
+ assertFalse(unhealthy.shouldScanData());
+
+ // Clear metrics to check the next run.
+ metrics.resetNumContainersScanned();
+ metrics.resetNumUnhealthyContainers();
+
+ scanner.runIteration();
+ // The only invocation of unhealthy on this container should have been from
+ // the previous scan.
+ verifyContainerMarkedUnhealthy(unhealthy, atMostOnce());
+ // This iteration should skip the already unhealthy container.
+ assertEquals(2, metrics.getNumScanIterations());
+ assertEquals(1, metrics.getNumContainersScanned());
+ assertEquals(0, metrics.getNumUnHealthyContainers());
+ }
+
/**
* A datanode will have one background data scanner per volume. When the
* volume fails, the scanner thread should be terminated.
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
index 5fe9975bdd..712e89fde4 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
@@ -20,7 +20,10 @@
package org.apache.hadoop.ozone.container.ozoneimpl;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
+import org.apache.hadoop.hdfs.util.Canceler;
+import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.BeforeEach;
@@ -30,12 +33,17 @@ import org.mockito.quality.Strictness;
import java.util.Optional;
+import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.atMostOnce;
import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.when;
/**
* Unit tests for the background container metadata scanner.
@@ -115,6 +123,46 @@ public class TestBackgroundContainerMetadataScanner extends
verifyContainerMarkedUnhealthy(openContainer, never());
}
+ @Test
+ @Override
+ public void testUnhealthyContainerNotRescanned() throws Exception {
+ Container<?> unhealthy = mockKeyValueContainer();
+ when(unhealthy.scanMetaData()).thenReturn(false);
+ when(unhealthy.scanData(
+ any(DataTransferThrottler.class), any(Canceler.class)))
+ .thenReturn(true);
+
+ setContainers(unhealthy, healthy);
+
+ // First iteration should find the unhealthy container.
+ scanner.runIteration();
+ verifyContainerMarkedUnhealthy(unhealthy, atMostOnce());
+ ContainerMetadataScannerMetrics metrics = scanner.getMetrics();
+ assertEquals(1, metrics.getNumScanIterations());
+ assertEquals(2, metrics.getNumContainersScanned());
+ assertEquals(1, metrics.getNumUnHealthyContainers());
+
+ // The unhealthy container should have been moved to the unhealthy state.
+ Mockito.verify(unhealthy.getContainerData(), atMostOnce())
+ .setState(UNHEALTHY);
+ // Update the mock to reflect this.
+ Mockito.when(unhealthy.getContainerState()).thenReturn(UNHEALTHY);
+ assertFalse(unhealthy.shouldScanMetadata());
+
+ // Clear metrics to check the next run.
+ metrics.resetNumContainersScanned();
+ metrics.resetNumUnhealthyContainers();
+
+ scanner.runIteration();
+ // The only invocation of unhealthy on this container should have been from
+ // the previous scan.
+ verifyContainerMarkedUnhealthy(unhealthy, atMostOnce());
+ // This iteration should skip the already unhealthy container.
+ assertEquals(2, metrics.getNumScanIterations());
+ assertEquals(1, metrics.getNumContainersScanned());
+ assertEquals(0, metrics.getNumUnHealthyContainers());
+ }
+
/**
* A datanode will have one metadata scanner thread for the whole process.
* When a volume fails, any the containers queued for scanning in that volume
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
index 08a5b65136..504d4304f1 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
@@ -21,6 +21,8 @@ import
org.apache.hadoop.ozone.container.common.ContainerTestUtils;
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
+import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer;
+import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.Mockito;
@@ -30,13 +32,17 @@ import org.mockito.verification.VerificationMode;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
import static org.apache.hadoop.hdds.conf.OzoneConfiguration.newInstanceOf;
+import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.CLOSED;
import static
org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration.CONTAINER_SCAN_MIN_GAP_DEFAULT;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -68,7 +74,11 @@ public abstract class TestContainerScannersAbstract {
protected ContainerScannerConfiguration conf;
protected ContainerController controller;
+
+ private Collection<Container<?>> containers;
+
public void setup() {
+ containers = new ArrayList<>();
conf = newInstanceOf(ContainerScannerConfiguration.class);
conf.setMetadataScanInterval(0);
conf.setDataScanInterval(0);
@@ -100,6 +110,9 @@ public abstract class TestContainerScannersAbstract {
@Test
public abstract void testWithVolumeFailure() throws Exception;
+ @Test
+ public abstract void testUnhealthyContainerNotRescanned() throws Exception;
+
// HELPER METHODS
protected void setScannedTimestampOld(Container<ContainerData> container) {
@@ -123,12 +136,47 @@ public abstract class TestContainerScannersAbstract {
}
protected void verifyContainerMarkedUnhealthy(
- Container<ContainerData> container, VerificationMode invocationTimes)
+ Container<?> container, VerificationMode invocationTimes)
throws Exception {
Mockito.verify(controller, invocationTimes).markContainerUnhealthy(
container.getContainerData().getContainerID());
}
+ /**
+ * Mock a KeyValueContainer implementation instead of a container
+ * interface like ContainerTestUtils#setupMockContainer.
+ * This allows testing that the shouldScanData method skips unhealthy
+ * containers.
+ */
+ protected Container<?> mockKeyValueContainer() {
+ KeyValueContainer unhealthy = Mockito.mock(KeyValueContainer.class);
+
+ KeyValueContainerData data = mock(KeyValueContainerData.class);
+ when(data.getContainerID()).thenReturn(CONTAINER_SEQ_ID.incrementAndGet());
+ when(unhealthy.getContainerData()).thenReturn(data);
+ when(unhealthy.getContainerState()).thenReturn(CLOSED);
+ // The above mocks should be enough for the scanners to call this method
+ // and test it.
+ when(unhealthy.shouldScanData()).thenCallRealMethod();
+ assertTrue(unhealthy.shouldScanData());
+ when(unhealthy.shouldScanMetadata()).thenCallRealMethod();
+ assertTrue(unhealthy.shouldScanMetadata());
+
+ when(unhealthy.getContainerData().getVolume()).thenReturn(vol);
+
+ return unhealthy;
+ }
+
+ /**
+ * Add a container to be returned by the mock ContainerController.
+ */
+ protected void setContainers(Container<?>... containers) {
+ this.containers = Arrays.stream(containers).collect(Collectors.toList());
+ when(controller.getContainers(vol))
+ .thenAnswer(i -> this.containers.iterator());
+ when(controller.getContainers()).thenReturn(this.containers);
+ }
+
private ContainerController mockContainerController() {
// healthy container
ContainerTestUtils.setupMockContainer(healthy,
@@ -147,8 +195,7 @@ public abstract class TestContainerScannersAbstract {
ContainerTestUtils.setupMockContainer(openCorruptMetadata,
false, false, false, CONTAINER_SEQ_ID, vol);
- Collection<Container<?>> containers = Arrays.asList(
- healthy, corruptData, openCorruptMetadata);
+ containers.addAll(Arrays.asList(healthy, corruptData,
openCorruptMetadata));
ContainerController mock = mock(ContainerController.class);
when(mock.getContainers(vol)).thenReturn(containers.iterator());
when(mock.getContainers()).thenReturn(containers);
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java
index 8f1f2f2f90..e89cecfae0 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java
@@ -20,8 +20,9 @@
package org.apache.hadoop.ozone.container.ozoneimpl;
import org.apache.commons.compress.utils.Lists;
+import org.apache.hadoop.hdfs.util.Canceler;
+import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
-import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
@@ -38,13 +39,17 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
+import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.atMostOnce;
import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.when;
/**
* Unit tests for the on-demand container scanner.
@@ -217,7 +222,43 @@ public class TestOnDemandContainerDataScanner extends
assertEquals(0, metrics.getNumUnHealthyContainers());
}
- private void scanContainer(Container<ContainerData> container)
+ @Test
+ @Override
+ public void testUnhealthyContainerNotRescanned() throws Exception {
+ Container<?> unhealthy = mockKeyValueContainer();
+ when(unhealthy.scanMetaData()).thenReturn(true);
+ when(unhealthy.scanData(
+ any(DataTransferThrottler.class), any(Canceler.class)))
+ .thenReturn(false);
+
+ // First iteration should find the unhealthy container.
+ scanContainer(unhealthy);
+ verifyContainerMarkedUnhealthy(unhealthy, atMostOnce());
+ OnDemandScannerMetrics metrics = OnDemandContainerDataScanner.getMetrics();
+ assertEquals(1, metrics.getNumContainersScanned());
+ assertEquals(1, metrics.getNumUnHealthyContainers());
+
+ // The unhealthy container should have been moved to the unhealthy state.
+ Mockito.verify(unhealthy.getContainerData(), atMostOnce())
+ .setState(UNHEALTHY);
+ // Update the mock to reflect this.
+ Mockito.when(unhealthy.getContainerState()).thenReturn(UNHEALTHY);
+ assertFalse(unhealthy.shouldScanData());
+
+ // Clear metrics to check the next run.
+ metrics.resetNumContainersScanned();
+ metrics.resetNumUnhealthyContainers();
+
+ scanContainer(unhealthy);
+ // The only invocation of unhealthy on this container should have been from
+ // the previous scan.
+ verifyContainerMarkedUnhealthy(unhealthy, atMostOnce());
+ // This iteration should skip the already unhealthy container.
+ assertEquals(0, metrics.getNumContainersScanned());
+ assertEquals(0, metrics.getNumUnHealthyContainers());
+ }
+
+ private void scanContainer(Container<?> container)
throws Exception {
OnDemandContainerDataScanner.init(conf, controller);
Optional<Future<?>> scanFuture =
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]