This is an automated email from the ASF dual-hosted git repository.

avijayan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 25561f8  HDDS-5623. Add data validation checks on datanode. (#2542)
25561f8 is described below

commit 25561f8187f47077a6b5ace41ff67242f10a8fbc
Author: bshashikant <[email protected]>
AuthorDate: Thu Aug 26 23:42:39 2021 +0530

    HDDS-5623. Add data validation checks on datanode. (#2542)
---
 .../common/statemachine/DatanodeConfiguration.java | 21 ++++++++++++++++
 .../server/ratis/ContainerStateMachine.java        |  4 ++++
 .../ozone/container/keyvalue/KeyValueHandler.java  | 28 +++++++++++++++++++++-
 .../container/keyvalue/helpers/ChunkUtils.java     | 18 +++++++++++++-
 .../keyvalue/impl/FilePerBlockStrategy.java        |  9 +++++--
 .../src/main/proto/DatanodeClientProtocol.proto    |  1 +
 6 files changed, 77 insertions(+), 4 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
index e67ceb2..c483262 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
@@ -52,6 +52,8 @@ public class DatanodeConfiguration {
   public static final String DISK_CHECK_TIMEOUT_KEY =
       "hdds.datanode.disk.check.timeout";
 
+  static final boolean CHUNK_DATA_VALIDATION_CHECK_DEFAULT = false;
+
   static final int REPLICATION_MAX_STREAMS_DEFAULT = 10;
 
   static final long PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT = 60;
@@ -214,6 +216,16 @@ public class DatanodeConfiguration {
   )
   private long diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT;
 
+  @Config(key = "chunk.data.validation.check",
+      defaultValue = "false",
+      type = ConfigType.BOOLEAN,
+      tags = { DATANODE },
+      description = "Enable safety checks such as checksum validation"
+          + " for Ratis calls."
+  )
+  private boolean isChunkDataValidationCheck =
+      CHUNK_DATA_VALIDATION_CHECK_DEFAULT;
+
   @PostConstruct
   public void validate() {
     if (replicationMaxStreams < 1) {
@@ -340,4 +352,13 @@ public class DatanodeConfiguration {
   public void setBlockDeleteQueueLimit(int queueLimit) {
     this.blockDeleteQueueLimit = queueLimit;
   }
+
+  public boolean isChunkDataValidationCheck() {
+    return isChunkDataValidationCheck;
+  }
+
+  public void setChunkDataValidationCheck(boolean writeChunkValidationCheck) {
+    isChunkDataValidationCheck = writeChunkValidationCheck;
+  }
+
 }
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
index f74091c..fda9979 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
@@ -347,6 +347,8 @@ public class ContainerStateMachine extends BaseStateMachine 
{
               .setWriteChunk(commitWriteChunkProto)
               .setTraceID(proto.getTraceID())
               .build();
+      Preconditions.checkArgument(write.hasData());
+      Preconditions.checkArgument(!write.getData().isEmpty());
 
       return TransactionContext.newBuilder()
           .setClientRequest(request)
@@ -414,6 +416,7 @@ public class ContainerStateMachine extends BaseStateMachine 
{
       long startTime) {
     final WriteChunkRequestProto write = requestProto.getWriteChunk();
     RaftServer server = ratisServer.getServer();
+    Preconditions.checkArgument(!write.getData().isEmpty());
     try {
       if (server.getDivision(gid).getInfo().isLeader()) {
         stateMachineDataCache.put(entryIndex, write.getData());
@@ -646,6 +649,7 @@ public class ContainerStateMachine extends BaseStateMachine 
{
         final CompletableFuture<ByteString> future = new CompletableFuture<>();
         ByteString data = stateMachineDataCache.get(entry.getIndex());
         if (data != null) {
+          Preconditions.checkArgument(!data.isEmpty());
           future.complete(data);
           return future;
         }
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 163556f..e978bf8 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -49,7 +49,9 @@ import org.apache.hadoop.hdds.scm.ByteStringConversion;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.common.Checksum;
 import org.apache.hadoop.ozone.common.ChunkBuffer;
+import org.apache.hadoop.ozone.common.OzoneChecksumException;
 import org.apache.hadoop.ozone.common.utils.BufferUtils;
 import org.apache.hadoop.ozone.container.common.helpers.BlockData;
 import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo;
@@ -61,6 +63,7 @@ import 
org.apache.hadoop.ozone.container.common.impl.ContainerSet;
 import org.apache.hadoop.ozone.container.common.interfaces.Container;
 import org.apache.hadoop.ozone.container.common.interfaces.Handler;
 import 
org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy;
+import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
 import 
org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext;
 import 
org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext.WriteChunkStage;
 import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
@@ -68,6 +71,7 @@ import 
org.apache.hadoop.ozone.container.common.volume.HddsVolume;
 import 
org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy;
 import org.apache.hadoop.ozone.container.common.volume.VolumeSet;
 import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils;
+import org.apache.hadoop.ozone.container.keyvalue.helpers.ChunkUtils;
 import org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl;
 import org.apache.hadoop.ozone.container.keyvalue.impl.ChunkManagerFactory;
 import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager;
@@ -116,6 +120,7 @@ public class KeyValueHandler extends Handler {
   private final VolumeChoosingPolicy volumeChoosingPolicy;
   private final long maxContainerSize;
   private final Function<ByteBuffer, ByteString> byteBufferToByteString;
+  private final boolean validateChunkChecksumData;
 
   // A lock that is held during container creation.
   private final AutoCloseableLock containerCreationLock;
@@ -126,6 +131,8 @@ public class KeyValueHandler extends Handler {
     super(config, datanodeId, contSet, volSet, metrics, icrSender);
     containerType = ContainerType.KeyValueContainer;
     blockManager = new BlockManagerImpl(config);
+    validateChunkChecksumData = conf.getObject(
+        DatanodeConfiguration.class).isChunkDataValidationCheck();
     chunkManager = ChunkManagerFactory.createChunkManager(config, blockManager,
         volSet);
     try {
@@ -595,6 +602,12 @@ public class KeyValueHandler extends Handler {
 
       data = chunkManager.readChunk(kvContainer, blockID, chunkInfo,
           dispatcherContext);
+      // Validate data only if the read chunk is issued by Ratis for its
+      // internal logic.
+      //  For client reads, the client is expected to validate.
+      if (dispatcherContext.isReadFromTmpFile()) {
+        validateChunkChecksumData(data, chunkInfo);
+      }
       metrics.incContainerBytesStats(Type.ReadChunk, chunkInfo.getLen());
     } catch (StorageContainerException ex) {
       return ContainerUtils.logAndReturnError(LOG, ex, request);
@@ -664,6 +677,18 @@ public class KeyValueHandler extends Handler {
     return getSuccessResponse(request);
   }
 
+  private void validateChunkChecksumData(ChunkBuffer data, ChunkInfo info)
+      throws StorageContainerException {
+    if (validateChunkChecksumData) {
+      try {
+        Checksum.verifyChecksum(data.toByteString(byteBufferToByteString),
+            info.getChecksumData(), 0);
+      } catch (OzoneChecksumException ex) {
+        throw ChunkUtils.wrapInStorageContainerException(ex);
+      }
+    }
+  }
+
   /**
    * Handle Write Chunk operation. Calls ChunkManager to process the request.
    */
@@ -697,8 +722,8 @@ public class KeyValueHandler extends Handler {
           stage == WriteChunkStage.COMBINED) {
         data =
             ChunkBuffer.wrap(writeChunk.getData().asReadOnlyByteBufferList());
+        validateChunkChecksumData(data, chunkInfo);
       }
-
       chunkManager
           .writeChunk(kvContainer, blockID, chunkInfo, data, 
dispatcherContext);
 
@@ -761,6 +786,7 @@ public class KeyValueHandler extends Handler {
       // here. There is no need to maintain this info in openContainerBlockMap.
       chunkManager
           .writeChunk(kvContainer, blockID, chunkInfo, data, 
dispatcherContext);
+      validateChunkChecksumData(data, chunkInfo);
       chunkManager.finishWriteChunks(kvContainer, blockData);
 
       List<ContainerProtos.ChunkInfo> chunks = new LinkedList<>();
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
index 773b36a..353dbcd 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
@@ -49,6 +49,7 @@ import com.google.common.annotations.VisibleForTesting;
 
 import static java.nio.channels.FileChannel.open;
 import static java.util.Collections.unmodifiableSet;
+import static 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CHUNK_FILE_INCONSISTENCY;
 import static 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_INTERNAL_ERROR;
 import static 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.INVALID_WRITE_SIZE;
 import static 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.IO_EXCEPTION;
@@ -351,7 +352,7 @@ public final class ChunkUtils {
     }
   }
 
-  private static StorageContainerException wrapInStorageContainerException(
+  public static StorageContainerException wrapInStorageContainerException(
       IOException e) {
     ContainerProtos.Result result = translate(e);
     return new StorageContainerException(e, result);
@@ -373,4 +374,19 @@ public final class ChunkUtils {
 
     return CONTAINER_INTERNAL_ERROR;
   }
+
+  /**
+   * Checks if the block file length is equal to the chunk offset.
+   *
+   */
+  public static void validateChunkSize(File chunkFile, ChunkInfo chunkInfo)
+      throws StorageContainerException {
+    long offset = chunkInfo.getOffset();
+    long len = chunkFile.length();
+    if (chunkFile.length() != offset) {
+      throw new StorageContainerException(
+          "Chunk file offset " + offset + " does not match blockFile length " +
+          len, CHUNK_FILE_INCONSISTENCY);
+    }
+  }
 }
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
index 3afcdf3..5fd23b5 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
@@ -135,8 +135,13 @@ public class FilePerBlockStrategy implements ChunkManager {
       throw e;
     }
 
-    ChunkUtils.writeData(channel, chunkFile.getName(), data, offset, len,
-        volume);
+    // check whether offset matches block file length if its an overwrite
+    if (!overwrite) {
+      ChunkUtils.validateChunkSize(chunkFile, info);
+    }
+
+    ChunkUtils
+        .writeData(channel, chunkFile.getName(), data, offset, len, volume);
 
     containerData.updateWriteStats(len, overwrite);
   }
diff --git 
a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto 
b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
index 31947db..9f57e14 100644
--- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
+++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
@@ -145,6 +145,7 @@ enum Result {
   CONTAINER_MISSING = 40;
   BLOCK_TOKEN_VERIFICATION_FAILED = 41;
   ERROR_IN_DB_SYNC = 42;
+  CHUNK_FILE_INCONSISTENCY = 43;
 }
 
 /**

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to