Re: [PR] HDDS-10338. Implement a Client Datanode API to stream a block [ozone]

via GitHub Tue, 08 Oct 2024 12:39:25 -0700


chungen0126 commented on code in PR #6613:
URL: https://github.com/apache/ozone/pull/6613#discussion_r1792381652



##########
hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/StreamBlockInput.java:
##########
@@ -0,0 +1,777 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.hdds.scm.storage;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.hadoop.fs.ByteBufferReadable;
+import org.apache.hadoop.fs.CanUnbuffer;
+import org.apache.hadoop.fs.Seekable;
+import org.apache.hadoop.hdds.client.BlockID;
+import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
+import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadChunkResponseProto;
+import 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadBlockResponseProto;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.OzoneClientConfig;
+import org.apache.hadoop.hdds.scm.XceiverClientFactory;
+import org.apache.hadoop.hdds.scm.XceiverClientSpi;
+import org.apache.hadoop.hdds.scm.client.HddsClientUtils;
+import 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
+import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
+import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
+import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier;
+import org.apache.hadoop.io.retry.RetryPolicy;
+import org.apache.hadoop.ozone.common.Checksum;
+import org.apache.hadoop.ozone.common.ChecksumData;
+import org.apache.hadoop.ozone.common.OzoneChecksumException;
+import org.apache.hadoop.ozone.common.utils.BufferUtils;
+import org.apache.hadoop.security.token.Token;
+import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
+import org.apache.ratis.thirdparty.io.grpc.Status;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Function;
+
+import static org.apache.hadoop.hdds.client.ReplicationConfig.getLegacyFactor;
+
+/**
+ * An {@link java.io.InputStream} called from KeyInputStream to read a block 
from the
+ * container.
+ */
+public class StreamBlockInput extends BlockExtendedInputStream
+    implements Seekable, CanUnbuffer, ByteBufferReadable {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(StreamBlockInput.class);
+  private final BlockID blockID;
+  private final long length;
+  private final AtomicReference<Pipeline> pipelineRef =
+      new AtomicReference<>();
+  private final AtomicReference<Token<OzoneBlockTokenIdentifier>> tokenRef =
+      new AtomicReference<>();
+  private XceiverClientFactory xceiverClientFactory;
+  private XceiverClientSpi xceiverClient;
+
+  private List<Long> bufferoffsets;
+  private int bufferIndex;
+  private long blockPosition = -1;
+  private List<ByteBuffer> buffers;
+  private boolean allocated = false;
+  private long bufferOffsetWrtBlockDataData;
+  private long buffersSize;
+  private static final int EOF = -1;
+  private final List<XceiverClientSpi.Validator> validators;
+  private final boolean verifyChecksum;
+  private final Function<BlockID, BlockLocationInfo> refreshFunction;
+  private final RetryPolicy retryPolicy;
+  private int retries;
+
+
+  public StreamBlockInput(
+      BlockID blockID, long length, Pipeline pipeline,
+      Token<OzoneBlockTokenIdentifier> token,
+      XceiverClientFactory xceiverClientFactory,
+      Function<BlockID, BlockLocationInfo> refreshFunction,
+      OzoneClientConfig config) throws IOException {
+    this.blockID = blockID;
+    LOG.debug("Initializing StreamBlockInput for block {}", blockID);
+    this.length = length;
+    setPipeline(pipeline);
+    tokenRef.set(token);
+    this.xceiverClientFactory = xceiverClientFactory;
+    this.validators = ContainerProtocolCalls.toValidatorList(
+        (request, response) -> validateBlock(response));
+    this.verifyChecksum = config.isChecksumVerify();
+    this.refreshFunction = refreshFunction;
+    this.retryPolicy =
+        HddsClientUtils.createRetryPolicy(config.getMaxReadRetryCount(),
+            TimeUnit.SECONDS.toMillis(config.getReadRetryInterval()));
+
+  }
+
+
+  public BlockID getBlockID() {
+    return blockID;
+  }
+
+  public long getLength() {
+    return length;
+  }
+
+  @Override
+  public synchronized long getPos() {
+    if (length == 0) {
+      return 0;
+    }
+    if (blockPosition >= 0) {
+      return blockPosition;
+    }
+
+    if (allocated && !buffersHaveData() && !dataRemainingInBlock()) {
+      Preconditions.checkState(
+          bufferOffsetWrtBlockDataData + buffersSize == length,
+          "EOF detected but not at the last byte of the chunk");
+      return length;
+    }
+    if (buffersHaveData()) {
+      // BufferOffset w.r.t to BlockData + BufferOffset w.r.t buffers +
+      // Position of current Buffer
+      return bufferOffsetWrtBlockDataData + bufferoffsets.get(bufferIndex) +
+          buffers.get(bufferIndex).position();
+    }
+    if (buffersAllocated()) {
+      return bufferOffsetWrtBlockDataData + buffersSize;
+    }
+    return 0;
+  }
+
+  @Override
+  public synchronized int read() throws IOException {
+    checkOpen();
+
+    int dataout = EOF;
+    int len = 1;
+    int available;
+    while (len > 0) {
+      try {
+        acquireClient();
+        available = prepareRead(1);
+        retries = 0;
+      } catch (SCMSecurityException ex) {
+        throw ex;
+      } catch (StorageContainerException e) {
+        if (shouldRetryRead(e)) {
+          releaseClient();
+          refreshBlockInfo(e);
+          continue;
+        } else {
+          throw e;
+        }
+      } catch (IOException ioe) {
+        if (shouldRetryRead(ioe)) {
+          if (isConnectivityIssue(ioe)) {
+            releaseClient();
+            refreshBlockInfo(ioe);
+          } else {
+            releaseClient();
+          }
+          continue;
+        } else {
+          throw ioe;
+        }
+      }
+      if (available == EOF) {
+        // There is no more data in the chunk stream. The buffers should have
+        // been released by now
+        Preconditions.checkState(buffers == null);
+      } else {
+        dataout = Byte.toUnsignedInt(buffers.get(bufferIndex).get());
+      }
+
+      len -= available;
+      if (bufferEOF()) {
+        releaseBuffers(bufferIndex);
+      }
+    }
+
+
+    return dataout;
+
+
+  }
+
+  @Override
+  public synchronized int read(byte[] b, int off, int len) throws IOException {
+    // According to the JavaDocs for InputStream, it is recommended that
+    // subclasses provide an override of bulk read if possible for performance
+    // reasons.  In addition to performance, we need to do it for correctness
+    // reasons.  The Ozone REST service uses PipedInputStream and
+    // PipedOutputStream to relay HTTP response data between a Jersey thread 
and
+    // a Netty thread.  It turns out that PipedInputStream/PipedOutputStream
+    // have a subtle dependency (bug?) on the wrapped stream providing separate
+    // implementations of single-byte read and bulk read.  Without this, get 
key
+    // responses might close the connection before writing all of the bytes
+    // advertised in the Content-Length.
+    if (b == null) {
+      throw new NullPointerException();
+    }
+    if (off < 0 || len < 0 || len > b.length - off) {
+      throw new IndexOutOfBoundsException();
+    }
+    if (len == 0) {
+      return 0;
+    }
+    int total = 0;
+    int available;
+    while (len > 0) {
+      try {
+        acquireClient();
+        available = prepareRead(len);
+        retries = 0;
+      } catch (SCMSecurityException ex) {
+        throw ex;
+      } catch (StorageContainerException e) {
+        if (shouldRetryRead(e)) {
+          releaseClient();
+          refreshBlockInfo(e);
+          continue;
+        } else {
+          throw e;
+        }
+      } catch (IOException ioe) {
+        if (shouldRetryRead(ioe)) {
+          if (isConnectivityIssue(ioe)) {
+            releaseClient();
+            refreshBlockInfo(ioe);
+          } else {
+            releaseClient();
+          }
+          continue;
+        } else {
+          throw ioe;
+        }
+      }
+      if (available == EOF) {
+        // There is no more data in the block stream. The buffers should have
+        // been released by now
+        Preconditions.checkState(buffers == null);
+        return total != 0 ? total : EOF;
+      }
+      buffers.get(bufferIndex).get(b, off + total, available);
+      len -= available;
+      total += available;
+
+      if (bufferEOF()) {
+        releaseBuffers(bufferIndex);
+      }
+    }
+    return total;
+
+  }
+
+  @Override
+  public synchronized void close() throws IOException {
+    releaseClient();
+    releaseBuffers();
+    xceiverClientFactory = null;
+  }
+
+  @Override
+  public synchronized int read(ByteBuffer byteBuffer) throws IOException {

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] HDDS-10338. Implement a Client Datanode API to stream a block [ozone]

Reply via email to