ivankelly commented on a change in pull request #1678: PIP-17: provide 
BlockAwareSegmentInputStream implementation and test
URL: https://github.com/apache/incubator-pulsar/pull/1678#discussion_r185562881
 
 

 ##########
 File path: 
pulsar-broker/src/main/java/org/apache/pulsar/broker/s3offload/impl/BlockAwareSegmentInputStreamImpl.java
 ##########
 @@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pulsar.broker.s3offload.impl;
+
+import static com.google.common.base.Preconditions.checkState;
+
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Ints;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.CompositeByteBuf;
+import io.netty.buffer.PooledByteBufAllocator;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+import org.apache.bookkeeper.client.api.LedgerEntries;
+import org.apache.bookkeeper.client.api.LedgerEntry;
+import org.apache.bookkeeper.client.api.ReadHandle;
+import org.apache.pulsar.broker.s3offload.BlockAwareSegmentInputStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The BlockAwareSegmentInputStreamImpl for each cold storage data block.
+ * It gets data from ledger, and will be read out the content for a data block.
+ * DataBlockHeader + entries(each with format[[entry_size -- int][entry_id -- 
long][entry_data]]) + padding
+ */
+public class BlockAwareSegmentInputStreamImpl extends InputStream implements 
BlockAwareSegmentInputStream {
+    private static final Logger log = 
LoggerFactory.getLogger(BlockAwareSegmentInputStreamImpl.class);
+
+    private static final byte[] BLOCK_END_PADDING = 
Ints.toByteArray(0xFEDCDEAD);
+
+    private final ReadHandle ledger;
+    private final long startEntryId;
+    private final int blockSize;
+
+    // Number of Message entries that read from ledger and been readout from 
this InputStream.
+    private int blockEntryCount;
+
+    // tracking read status for both header and entries.
+    // Bytes that already been read from this InputStream
+    private int bytesReadOffset = 0;
+    // Byte from this index is all padding byte
+    private int dataBlockFullOffset;
+    private final InputStream dataBlockHeaderStream;
+
+    // how many entries want to read from ReadHandle each time.
+    private static final int ENTRIES_PER_READ = 100;
+    // buf the entry size and entry id.
+    private static final int ENTRY_HEADER_SIZE = 4 /* entry size*/ + 8 /* 
entry id */;
+    // Keep a list of all entries ByteBuf, each ByteBuf contains 2 buf: entry 
header and entry content.
+    private List<ByteBuf> entriesByteBuf = null;
+
+    public BlockAwareSegmentInputStreamImpl(ReadHandle ledger, long 
startEntryId, int blockSize) {
+        this.ledger = ledger;
+        this.startEntryId = startEntryId;
+        this.blockSize = blockSize;
+        this.dataBlockHeaderStream = DataBlockHeaderImpl.of(blockSize, 
startEntryId).toStream();
+        this.blockEntryCount = 0;
+        this.dataBlockFullOffset = blockSize;
+        this.entriesByteBuf = Lists.newLinkedList();
+    }
+
+    // read ledger entries.
+    private int readEntries() throws IOException {
+        checkState(bytesReadOffset >= 
DataBlockHeaderImpl.getDataStartOffset());
+        checkState(bytesReadOffset < blockSize);
+
+        // once reach the end of entry buffer, start a new read.
+        if (bytesReadOffset < dataBlockFullOffset && entriesByteBuf.isEmpty()) 
{
 
 Review comment:
   moving the inner check out of the outer check will remove code duplication
   
   ```
    if (bytesReadOffset < dataBlockFullOffset && entriesByteBuf.isEmpty()) {
        entriesByteBuf = readNextEntriesFromLedger(startEntryId + 
blockEntryCount, ENTRIES_PER_READ);
   }
    if ((!entriesByteBuf.isEmpty()) && bytesReadOffset + 
entriesByteBuf.get(0).readableBytes() > blockSize) {
       // not able to place a new Entry.
      entriesByteBuf.forEach(buf -> buf.release());
      entriesByteBuf.clear();
      dataBlockFullOffset = bytesReadOffset;
   
      return BLOCK_END_PADDING[(bytesReadOffset++ - dataBlockFullOffset) % 4];
   } else if (bytesReadOffset < dataBlockFullOffset) {
       ...
   ```
   
   If fact the second else if is even unnecessary. else would be enough.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to