This is an automated email from the ASF dual-hosted git repository.
sshenoy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new b85e6501d8 HDDS-8447. Datanodes should not process container deletes
for failed volumes. (#4901)
b85e6501d8 is described below
commit b85e6501d8b20d26a542f5ba68fd878a6029859f
Author: Sadanand Shenoy <[email protected]>
AuthorDate: Wed Jun 28 11:49:20 2023 +0530
HDDS-8447. Datanodes should not process container deletes for failed
volumes. (#4901)
---
.../ozone/container/common/volume/HddsVolume.java | 7 ++-
.../ozone/container/keyvalue/KeyValueHandler.java | 49 ++++++++++++----
.../container/keyvalue/TestKeyValueHandler.java | 66 ++++++++++++++++++----
3 files changed, 100 insertions(+), 22 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
index bd840ba300..bcbfe37554 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
@@ -25,10 +25,10 @@ import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hdds.annotation.InterfaceAudience;
import org.apache.hadoop.hdds.annotation.InterfaceStability;
-
import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature;
import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult;
import org.apache.hadoop.ozone.OzoneConsts;
@@ -363,6 +363,11 @@ public class HddsVolume extends StorageVolume {
return this.deletedContainerDir;
}
+ @VisibleForTesting
+ public void setDeletedContainerDir(File deletedContainerDir) {
+ this.deletedContainerDir = deletedContainerDir;
+ }
+
public boolean isDbLoaded() {
return dbLoaded.get();
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 5bfb77a4bc..279db24c4b 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -75,6 +75,7 @@ import
org.apache.hadoop.ozone.container.common.report.IncrementalReportSender;
import
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
import
org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext;
import
org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext.WriteChunkStage;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import
org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy;
import org.apache.hadoop.ozone.container.common.volume.VolumeSet;
@@ -126,7 +127,7 @@ import org.slf4j.LoggerFactory;
*/
public class KeyValueHandler extends Handler {
- private static final Logger LOG = LoggerFactory.getLogger(
+ public static final Logger LOG = LoggerFactory.getLogger(
KeyValueHandler.class);
private final BlockManager blockManager;
@@ -1288,6 +1289,16 @@ public class KeyValueHandler extends Handler {
throws StorageContainerException {
container.writeLock();
try {
+ if (container.getContainerData().getVolume().isFailed()) {
+ // if the volume in which the container resides fails
+ // don't attempt to delete/move it. When a volume fails,
+ // failedVolumeListener will pick it up and clear the container
+ // from the container set.
+ LOG.info("Delete container issued on containerID {} which is in a " +
+ "failed volume. Skipping", container.getContainerData()
+ .getContainerID());
+ return;
+ }
// If force is false, we check container state.
if (!force) {
// Check if container is open
@@ -1325,12 +1336,15 @@ public class KeyValueHandler extends Handler {
// Rename container location
try {
- KeyValueContainerUtil.moveToDeletedContainerDir(
- keyValueContainerData, hddsVolume);
- } catch (IOException ex) {
- LOG.error("Failed to move deleted container under {}",
- hddsVolume.getDeletedContainerDir(), ex);
- throw new StorageContainerException("Moving deleted container
failed",
+
KeyValueContainerUtil.moveToDeletedContainerDir(keyValueContainerData,
+ hddsVolume);
+ } catch (IOException ioe) {
+ LOG.error("Failed to move container under " + hddsVolume
+ .getDeletedContainerDir());
+ String errorMsg =
+ "Failed to move container" + container.getContainerData()
+ .getContainerID();
+ triggerVolumeScanAndThrowException(container, errorMsg,
CONTAINER_INTERNAL_ERROR);
}
@@ -1352,9 +1366,11 @@ public class KeyValueHandler extends Handler {
// empty as a defensive check.
LOG.error("Could not determine if the container {} is empty",
container.getContainerData().getContainerID(), e);
- throw new StorageContainerException("Could not determine if container "
- + container.getContainerData().getContainerID() +
- " is empty", DELETE_ON_NON_EMPTY_CONTAINER);
+ String errorMsg =
+ "Failed to read container dir" + container.getContainerData()
+ .getContainerID();
+ triggerVolumeScanAndThrowException(container, errorMsg,
+ CONTAINER_INTERNAL_ERROR);
} finally {
container.writeUnlock();
}
@@ -1363,4 +1379,17 @@ public class KeyValueHandler extends Handler {
container.getContainerData().setState(State.DELETED);
sendICR(container);
}
+
+ private void triggerVolumeScanAndThrowException(Container container,
+ String msg, ContainerProtos.Result result)
+ throws StorageContainerException {
+ // Trigger a volume scan as exception occurred.
+ StorageVolumeUtil.onFailure(container.getContainerData().getVolume());
+ throw new StorageContainerException(msg, result);
+ }
+
+ public static Logger getLogger() {
+ return LOG;
+ }
+
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
index a841735cd7..31c9173bd8 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
@@ -26,6 +26,7 @@ import java.util.HashMap;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
+import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.StorageUnit;
import org.apache.hadoop.fs.FileUtil;
@@ -90,6 +91,7 @@ public class TestKeyValueHandler {
private static final String DATANODE_UUID = UUID.randomUUID().toString();
private static final long DUMMY_CONTAINER_ID = 9999;
+ private static final String DUMMY_PATH = "/dummy/dir/doesnt/exist";
private final ContainerLayoutVersion layout;
@@ -351,21 +353,24 @@ public class TestKeyValueHandler {
ContainerProtos.Result.INVALID_CONTAINER_STATE, response.getResult());
}
+ @SuppressFBWarnings("DMI_HARDCODED_ABSOLUTE_FILENAME")
@Test
public void testDeleteContainer() throws IOException {
final String testDir = GenericTestUtils.getTempPath(
TestKeyValueHandler.class.getSimpleName() +
"-" + UUID.randomUUID().toString());
try {
+ // Case 1 : Regular container delete
final long containerID = 1L;
final String clusterId = UUID.randomUUID().toString();
final String datanodeId = UUID.randomUUID().toString();
final ConfigurationSource conf = new OzoneConfiguration();
final ContainerSet containerSet = new ContainerSet(1000);
- final VolumeSet volumeSet = Mockito.mock(VolumeSet.class);
+ final MutableVolumeSet volumeSet = Mockito.mock(MutableVolumeSet.class);
HddsVolume hddsVolume = new HddsVolume.Builder(testDir).conf(conf)
.clusterID(clusterId).datanodeUuid(datanodeId)
+ .volumeSet(volumeSet)
.build();
hddsVolume.format(clusterId);
hddsVolume.createWorkingDir(clusterId, null);
@@ -389,16 +394,7 @@ public class TestKeyValueHandler {
kvHandler.setClusterID(clusterId);
final ContainerCommandRequestProto createContainer =
- ContainerCommandRequestProto.newBuilder()
- .setCmdType(ContainerProtos.Type.CreateContainer)
- .setDatanodeUuid(datanodeId)
- .setCreateContainer(
- ContainerProtos.CreateContainerRequestProto.newBuilder()
- .setContainerType(ContainerType.KeyValueContainer)
- .build())
- .setContainerID(containerID)
- .setPipelineID(UUID.randomUUID().toString())
- .build();
+ createContainerRequest(datanodeId, containerID);
kvHandler.handleCreateContainer(createContainer, null);
Assert.assertEquals(1, icrReceived.get());
@@ -412,8 +408,56 @@ public class TestKeyValueHandler {
hddsVolume.getDeletedContainerDir().listFiles();
assertNotNull(deletedContainers);
Assertions.assertEquals(0, deletedContainers.length);
+
+ // Case 2 : failed move of container dir to tmp location should trigger
+ // a volume scan
+
+ final long container2ID = 2L;
+
+ final ContainerCommandRequestProto createContainer2 =
+ createContainerRequest(datanodeId, container2ID);
+
+ kvHandler.handleCreateContainer(createContainer2, null);
+
+ Assert.assertEquals(3, icrReceived.get());
+ Assert.assertNotNull(containerSet.getContainer(container2ID));
+ File deletedContainerDir = hddsVolume.getDeletedContainerDir();
+ // to simulate failed move
+ File dummyDir = new File(DUMMY_PATH);
+ hddsVolume.setDeletedContainerDir(dummyDir);
+ try {
+ kvHandler.deleteContainer(containerSet.getContainer(container2ID),
+ true);
+ } catch (StorageContainerException sce) {
+ Assert.assertTrue(
+ sce.getMessage().contains("Failed to move container"));
+ }
+ Mockito.verify(volumeSet).checkVolumeAsync(hddsVolume);
+ // cleanup
+ hddsVolume.setDeletedContainerDir(deletedContainerDir);
+
+ // Case 3: Delete Container on a failed volume
+ hddsVolume.failVolume();
+ GenericTestUtils.LogCapturer kvHandlerLogs =
+
GenericTestUtils.LogCapturer.captureLogs(KeyValueHandler.getLogger());
+ kvHandler.deleteContainer(containerSet.getContainer(container2ID), true);
+ String expectedLog =
+ "Delete container issued on containerID 2 which is " +
+ "in a failed volume";
+ Assert.assertTrue(kvHandlerLogs.getOutput().contains(expectedLog));
} finally {
FileUtils.deleteDirectory(new File(testDir));
}
}
+
+ private static ContainerCommandRequestProto createContainerRequest(
+ String datanodeId, long containerID) {
+ return ContainerCommandRequestProto.newBuilder()
+ .setCmdType(ContainerProtos.Type.CreateContainer)
+ .setDatanodeUuid(datanodeId).setCreateContainer(
+ ContainerProtos.CreateContainerRequestProto.newBuilder()
+ .setContainerType(ContainerType.KeyValueContainer).build())
+
.setContainerID(containerID).setPipelineID(UUID.randomUUID().toString())
+ .build();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]