[jira] [Work logged] (HDDS-1496) Support partial chunk reads and checksum verification

ASF GitHub Bot (JIRA) Mon, 03 Jun 2019 11:33:37 -0700


     [ 
https://issues.apache.org/jira/browse/HDDS-1496?focusedWorklogId=253263&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-253263
 ]


ASF GitHub Bot logged work on HDDS-1496:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 03/Jun/19 18:32
            Start Date: 03/Jun/19 18:32
    Worklog Time Spent: 10m 
      Work Description: bharatviswa504 commented on pull request #804: 
HDDS-1496. Support partial chunk reads and checksum verification
URL: https://github.com/apache/hadoop/pull/804#discussion_r289978511
 
 

 ##########
 File path: 
hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockInputStream.java
 ##########
 @@ -1,140 +1,145 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership.  The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations 
under
- * the License.
- */
 package org.apache.hadoop.hdds.scm.storage;
 
+import com.google.common.primitives.Bytes;
 import org.apache.hadoop.hdds.client.BlockID;
 import org.apache.hadoop.hdds.client.ContainerBlockID;
-import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
-    .ChecksumData;
-import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
-    .ChecksumType;
+import 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChecksumType;
 import 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo;
 import org.apache.hadoop.hdds.scm.XceiverClientManager;
-import org.apache.hadoop.hdds.scm.XceiverClientSpi;
-import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
+import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
+import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier;
+import org.apache.hadoop.ozone.common.Checksum;
+import org.apache.hadoop.security.token.Token;
+import org.bouncycastle.crypto.prng.RandomGenerator;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
 import java.io.EOFException;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
-import java.util.UUID;
 
-/**
- * Tests {@link BlockInputStream}.
- */
+import static 
org.apache.hadoop.hdds.scm.storage.TestChunkInputStream.generateRandomData;
+
 public class TestBlockInputStream {
 
-  private static BlockInputStream blockInputStream;
-  private static List<ChunkInfo> chunks;
-  private static int blockSize;
+  private static final int BLOCK_MAX_SIZE = 500;
+  private static final int CHUNK_SIZE = 100;
+  private static final BlockID BLOCK_ID = new BlockID(1, 1);
+  private static final Random RANDOM = new Random();
+  private static Checksum checksum;
 
-  private static final int CHUNK_SIZE = 20;
+  private BlockInputStream blockStream;
+  private byte[] blockData;
+  private int blockSize;
+  private List<ChunkInfo> chunks;
+  private Map<String, byte[]> chunkDataMap;
 
   @Before
   public void setup() throws Exception {
     BlockID blockID = new BlockID(new ContainerBlockID(1, 1));
-    chunks = createChunkList(10);
-    String traceID = UUID.randomUUID().toString();
-    blockInputStream = new DummyBlockInputStream(blockID, null, null, chunks,
-        traceID, false, 0);
-
-    blockSize = 0;
-    for (ChunkInfo chunk : chunks) {
-      blockSize += chunk.getLen();
-    }
+    checksum = new Checksum(ChecksumType.NONE, CHUNK_SIZE);
+    createChunkList(5);
+
+    blockStream = new DummyBlockInputStream(blockID, blockSize, null, 0,
+        null, false, null, null);
   }
 
   /**
    * Create a mock list of chunks. The first n-1 chunks of length CHUNK_SIZE
    * and the last chunk with length CHUNK_SIZE/2.
-   * @param numChunks
-   * @return
    */
-  private static List<ChunkInfo> createChunkList(int numChunks) {
-    ChecksumData dummyChecksumData = ChecksumData.newBuilder()
-        .setType(ChecksumType.NONE)
-        .setBytesPerChecksum(100)
-        .build();
-    List<ChunkInfo> chunkList = new ArrayList<>(numChunks);
-    int i;
-    for (i = 0; i < numChunks - 1; i++) {
-      String chunkName = "chunk-" + i;
+  private void createChunkList(int numChunks)
+      throws Exception {
+
+    chunks = new ArrayList<>(numChunks);
+    chunkDataMap = new HashMap<>();
+    blockData = new byte[0];
+    int i, chunkLen;
+    byte[] byteData;
+    String chunkName;
+
+    for (i = 0; i < numChunks; i++) {
+      chunkName = "chunk-" + i;
+      chunkLen = CHUNK_SIZE;
+      if (i == numChunks - 1) {
+        chunkLen = CHUNK_SIZE / 2;
+      }
+      byteData = generateRandomData(chunkLen);
       ChunkInfo chunkInfo = ChunkInfo.newBuilder()
           .setChunkName(chunkName)
           .setOffset(0)
-          .setLen(CHUNK_SIZE)
-          .setChecksumData(dummyChecksumData)
+          .setLen(chunkLen)
+          .setChecksumData(checksum.computeChecksum(
+              byteData, 0, chunkLen).getProtoBufMessage())
           .build();
-      chunkList.add(chunkInfo);
+
+      chunkDataMap.put(chunkName, byteData);
+      chunks.add(chunkInfo);
+
+      blockSize += chunkLen;
+      blockData = Bytes.concat(blockData, byteData);
     }
-    ChunkInfo chunkInfo = ChunkInfo.newBuilder()
-        .setChunkName("chunk-" + i)
-        .setOffset(0)
-        .setLen(CHUNK_SIZE/2)
-        .setChecksumData(dummyChecksumData)
-        .build();
-    chunkList.add(chunkInfo);
-
-    return chunkList;
   }
 
   /**
-   * A dummy BlockInputStream to test the functionality of BlockInputStream.
+   * A dummy BlockInputStream to mock read block call to DN.
    */
-  private static class DummyBlockInputStream extends BlockInputStream {
+  private class DummyBlockInputStream extends BlockInputStream {
 
-    DummyBlockInputStream(BlockID blockID,
-        XceiverClientManager xceiverClientManager,
-        XceiverClientSpi xceiverClient,
-        List<ChunkInfo> chunks,
-        String traceID,
+    DummyBlockInputStream(BlockID blockId,
+        long blockLen,
+        Pipeline pipeline,
+        long containerKey,
+        Token<OzoneBlockTokenIdentifier> token,
         boolean verifyChecksum,
-        long initialPosition) throws IOException {
-      super(blockID, xceiverClientManager, xceiverClient, chunks, traceID,
-          verifyChecksum, initialPosition);
+        String traceId,
+        XceiverClientManager xceiverClientManager) {
+      super(blockId, blockLen, pipeline, containerKey, token, verifyChecksum,
+          traceId, xceiverClientManager);
     }
 
     @Override
-    protected ByteString readChunk(final ChunkInfo chunkInfo,
-        List<DatanodeDetails> excludeDns, List<DatanodeDetails> 
dnListFromReply)
-        throws IOException {
-      return getByteString(chunkInfo.getChunkName(), (int) chunkInfo.getLen());
+    protected List<ChunkInfo> getChunkInfos() {
+      return chunks;
     }
 
     @Override
-    protected List<DatanodeDetails> getDatanodeList() {
-      // return an empty dummy list of size 10
-      return new ArrayList<>(10);
+    protected void addStream(ChunkInfo chunkInfo) {
+      TestChunkInputStream testChunkInputStream = new TestChunkInputStream();
+      chunkStreams.add(testChunkInputStream.new DummyChunkInputStream(
+          chunkInfo, null, null, null, false,
+          chunkDataMap.get(chunkInfo.getChunkName()).clone()));
     }
 
-    /**
-     * Create ByteString with the input data to return when a readChunk call is
-     * placed.
-     */
-    private static ByteString getByteString(String data, int length) {
-      while (data.length() < length) {
-        data = data + "0";
-      }
-      return ByteString.copyFrom(data.getBytes(), 0, length);
+    @Override
+    protected synchronized void checkOpen() throws IOException {
+      // No action needed
+    }
+  }
+
+  private void seekAndVerify(int pos) throws Exception {
+    blockStream.seek(pos);
+    Assert.assertEquals("Current position of buffer does not match with the " +
+        "seeked position", pos, blockStream.getPos());
+  }
+
+  /**
+   * Match readData with the chunkData byte-wise.
+   * @param readData Data read through ChunkInputStream
+   * @param inputDataStartIndex first index (inclusive) in chunkData to compare
+   *                            with read data
+   * @param inputDataEndIndex last index (exclusive) in chunkData to compare
 
 Review comment:
   Minor: this is not long last Index, it is length from inputDataStartIndex.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 253263)
    Time Spent: 6h  (was: 5h 50m)

> Support partial chunk reads and checksum verification
> -----------------------------------------------------
>
>                 Key: HDDS-1496
>                 URL: https://issues.apache.org/jira/browse/HDDS-1496
>             Project: Hadoop Distributed Data Store
>          Issue Type: Improvement
>            Reporter: Hanisha Koneru
>            Assignee: Hanisha Koneru
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 6h
>  Remaining Estimate: 0h
>
> BlockInputStream#readChunkFromContainer() reads the whole chunk from disk 
> even if we need to read only a part of the chunk.
> This Jira aims to improve readChunkFromContainer so that only that part of 
> the chunk file is read which is needed by client plus the part of chunk file 
> which is required to verify the checksum.
> For example, lets say the client is reading from index 120 to 450 in the 
> chunk. And let's say checksum is stored for every 100 bytes in the chunk i.e. 
> the first checksum is for bytes from index 0 to 99, the next for bytes from 
> index 100 to 199 and so on. To verify bytes from 120 to 450, we would need to 
> read from bytes 100 to 499 so that checksum verification can be done.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[jira] [Work logged] (HDDS-1496) Support partial chunk reads and checksum verification

Reply via email to