This is an automated email from the ASF dual-hosted git repository.
avijayan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 25561f8 HDDS-5623. Add data validation checks on datanode. (#2542)
25561f8 is described below
commit 25561f8187f47077a6b5ace41ff67242f10a8fbc
Author: bshashikant <[email protected]>
AuthorDate: Thu Aug 26 23:42:39 2021 +0530
HDDS-5623. Add data validation checks on datanode. (#2542)
---
.../common/statemachine/DatanodeConfiguration.java | 21 ++++++++++++++++
.../server/ratis/ContainerStateMachine.java | 4 ++++
.../ozone/container/keyvalue/KeyValueHandler.java | 28 +++++++++++++++++++++-
.../container/keyvalue/helpers/ChunkUtils.java | 18 +++++++++++++-
.../keyvalue/impl/FilePerBlockStrategy.java | 9 +++++--
.../src/main/proto/DatanodeClientProtocol.proto | 1 +
6 files changed, 77 insertions(+), 4 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
index e67ceb2..c483262 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
@@ -52,6 +52,8 @@ public class DatanodeConfiguration {
public static final String DISK_CHECK_TIMEOUT_KEY =
"hdds.datanode.disk.check.timeout";
+ static final boolean CHUNK_DATA_VALIDATION_CHECK_DEFAULT = false;
+
static final int REPLICATION_MAX_STREAMS_DEFAULT = 10;
static final long PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT = 60;
@@ -214,6 +216,16 @@ public class DatanodeConfiguration {
)
private long diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT;
+ @Config(key = "chunk.data.validation.check",
+ defaultValue = "false",
+ type = ConfigType.BOOLEAN,
+ tags = { DATANODE },
+ description = "Enable safety checks such as checksum validation"
+ + " for Ratis calls."
+ )
+ private boolean isChunkDataValidationCheck =
+ CHUNK_DATA_VALIDATION_CHECK_DEFAULT;
+
@PostConstruct
public void validate() {
if (replicationMaxStreams < 1) {
@@ -340,4 +352,13 @@ public class DatanodeConfiguration {
public void setBlockDeleteQueueLimit(int queueLimit) {
this.blockDeleteQueueLimit = queueLimit;
}
+
+ public boolean isChunkDataValidationCheck() {
+ return isChunkDataValidationCheck;
+ }
+
+ public void setChunkDataValidationCheck(boolean writeChunkValidationCheck) {
+ isChunkDataValidationCheck = writeChunkValidationCheck;
+ }
+
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
index f74091c..fda9979 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
@@ -347,6 +347,8 @@ public class ContainerStateMachine extends BaseStateMachine
{
.setWriteChunk(commitWriteChunkProto)
.setTraceID(proto.getTraceID())
.build();
+ Preconditions.checkArgument(write.hasData());
+ Preconditions.checkArgument(!write.getData().isEmpty());
return TransactionContext.newBuilder()
.setClientRequest(request)
@@ -414,6 +416,7 @@ public class ContainerStateMachine extends BaseStateMachine
{
long startTime) {
final WriteChunkRequestProto write = requestProto.getWriteChunk();
RaftServer server = ratisServer.getServer();
+ Preconditions.checkArgument(!write.getData().isEmpty());
try {
if (server.getDivision(gid).getInfo().isLeader()) {
stateMachineDataCache.put(entryIndex, write.getData());
@@ -646,6 +649,7 @@ public class ContainerStateMachine extends BaseStateMachine
{
final CompletableFuture<ByteString> future = new CompletableFuture<>();
ByteString data = stateMachineDataCache.get(entry.getIndex());
if (data != null) {
+ Preconditions.checkArgument(!data.isEmpty());
future.complete(data);
return future;
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 163556f..e978bf8 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -49,7 +49,9 @@ import org.apache.hadoop.hdds.scm.ByteStringConversion;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.common.Checksum;
import org.apache.hadoop.ozone.common.ChunkBuffer;
+import org.apache.hadoop.ozone.common.OzoneChecksumException;
import org.apache.hadoop.ozone.common.utils.BufferUtils;
import org.apache.hadoop.ozone.container.common.helpers.BlockData;
import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo;
@@ -61,6 +63,7 @@ import
org.apache.hadoop.ozone.container.common.impl.ContainerSet;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.hadoop.ozone.container.common.interfaces.Handler;
import
org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy;
+import
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
import
org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext;
import
org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext.WriteChunkStage;
import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
@@ -68,6 +71,7 @@ import
org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import
org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy;
import org.apache.hadoop.ozone.container.common.volume.VolumeSet;
import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils;
+import org.apache.hadoop.ozone.container.keyvalue.helpers.ChunkUtils;
import org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl;
import org.apache.hadoop.ozone.container.keyvalue.impl.ChunkManagerFactory;
import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager;
@@ -116,6 +120,7 @@ public class KeyValueHandler extends Handler {
private final VolumeChoosingPolicy volumeChoosingPolicy;
private final long maxContainerSize;
private final Function<ByteBuffer, ByteString> byteBufferToByteString;
+ private final boolean validateChunkChecksumData;
// A lock that is held during container creation.
private final AutoCloseableLock containerCreationLock;
@@ -126,6 +131,8 @@ public class KeyValueHandler extends Handler {
super(config, datanodeId, contSet, volSet, metrics, icrSender);
containerType = ContainerType.KeyValueContainer;
blockManager = new BlockManagerImpl(config);
+ validateChunkChecksumData = conf.getObject(
+ DatanodeConfiguration.class).isChunkDataValidationCheck();
chunkManager = ChunkManagerFactory.createChunkManager(config, blockManager,
volSet);
try {
@@ -595,6 +602,12 @@ public class KeyValueHandler extends Handler {
data = chunkManager.readChunk(kvContainer, blockID, chunkInfo,
dispatcherContext);
+ // Validate data only if the read chunk is issued by Ratis for its
+ // internal logic.
+ // For client reads, the client is expected to validate.
+ if (dispatcherContext.isReadFromTmpFile()) {
+ validateChunkChecksumData(data, chunkInfo);
+ }
metrics.incContainerBytesStats(Type.ReadChunk, chunkInfo.getLen());
} catch (StorageContainerException ex) {
return ContainerUtils.logAndReturnError(LOG, ex, request);
@@ -664,6 +677,18 @@ public class KeyValueHandler extends Handler {
return getSuccessResponse(request);
}
+ private void validateChunkChecksumData(ChunkBuffer data, ChunkInfo info)
+ throws StorageContainerException {
+ if (validateChunkChecksumData) {
+ try {
+ Checksum.verifyChecksum(data.toByteString(byteBufferToByteString),
+ info.getChecksumData(), 0);
+ } catch (OzoneChecksumException ex) {
+ throw ChunkUtils.wrapInStorageContainerException(ex);
+ }
+ }
+ }
+
/**
* Handle Write Chunk operation. Calls ChunkManager to process the request.
*/
@@ -697,8 +722,8 @@ public class KeyValueHandler extends Handler {
stage == WriteChunkStage.COMBINED) {
data =
ChunkBuffer.wrap(writeChunk.getData().asReadOnlyByteBufferList());
+ validateChunkChecksumData(data, chunkInfo);
}
-
chunkManager
.writeChunk(kvContainer, blockID, chunkInfo, data,
dispatcherContext);
@@ -761,6 +786,7 @@ public class KeyValueHandler extends Handler {
// here. There is no need to maintain this info in openContainerBlockMap.
chunkManager
.writeChunk(kvContainer, blockID, chunkInfo, data,
dispatcherContext);
+ validateChunkChecksumData(data, chunkInfo);
chunkManager.finishWriteChunks(kvContainer, blockData);
List<ContainerProtos.ChunkInfo> chunks = new LinkedList<>();
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
index 773b36a..353dbcd 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
@@ -49,6 +49,7 @@ import com.google.common.annotations.VisibleForTesting;
import static java.nio.channels.FileChannel.open;
import static java.util.Collections.unmodifiableSet;
+import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CHUNK_FILE_INCONSISTENCY;
import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_INTERNAL_ERROR;
import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.INVALID_WRITE_SIZE;
import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.IO_EXCEPTION;
@@ -351,7 +352,7 @@ public final class ChunkUtils {
}
}
- private static StorageContainerException wrapInStorageContainerException(
+ public static StorageContainerException wrapInStorageContainerException(
IOException e) {
ContainerProtos.Result result = translate(e);
return new StorageContainerException(e, result);
@@ -373,4 +374,19 @@ public final class ChunkUtils {
return CONTAINER_INTERNAL_ERROR;
}
+
+ /**
+ * Checks if the block file length is equal to the chunk offset.
+ *
+ */
+ public static void validateChunkSize(File chunkFile, ChunkInfo chunkInfo)
+ throws StorageContainerException {
+ long offset = chunkInfo.getOffset();
+ long len = chunkFile.length();
+ if (chunkFile.length() != offset) {
+ throw new StorageContainerException(
+ "Chunk file offset " + offset + " does not match blockFile length " +
+ len, CHUNK_FILE_INCONSISTENCY);
+ }
+ }
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
index 3afcdf3..5fd23b5 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
@@ -135,8 +135,13 @@ public class FilePerBlockStrategy implements ChunkManager {
throw e;
}
- ChunkUtils.writeData(channel, chunkFile.getName(), data, offset, len,
- volume);
+ // check whether offset matches block file length if its an overwrite
+ if (!overwrite) {
+ ChunkUtils.validateChunkSize(chunkFile, info);
+ }
+
+ ChunkUtils
+ .writeData(channel, chunkFile.getName(), data, offset, len, volume);
containerData.updateWriteStats(len, overwrite);
}
diff --git
a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
index 31947db..9f57e14 100644
--- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
+++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
@@ -145,6 +145,7 @@ enum Result {
CONTAINER_MISSING = 40;
BLOCK_TOKEN_VERIFICATION_FAILED = 41;
ERROR_IN_DB_SYNC = 42;
+ CHUNK_FILE_INCONSISTENCY = 43;
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]