This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 4269bfdeaa allow up to 4GB per bitmap index (#8796)
4269bfdeaa is described below

commit 4269bfdeaa161109f338f5533457c01d19abea1e
Author: Richard Startin <[email protected]>
AuthorDate: Wed Jun 1 19:51:51 2022 +0200

    allow up to 4GB per bitmap index (#8796)
---
 .../creator/impl/inv/BitmapInvertedIndexWriter.java      | 16 +++++++++++-----
 .../segment/index/readers/BitmapInvertedIndexReader.java | 15 +++++++++------
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
index 22fcf5170b..2e13a65c7e 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.segment.local.segment.creator.impl.inv;
 
+import com.google.common.base.Preconditions;
 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
@@ -55,7 +56,7 @@ public final class BitmapInvertedIndexWriter implements 
Closeable {
   private final FileChannel _fileChannel;
   private final ByteBuffer _offsetBuffer;
   private ByteBuffer _bitmapBuffer;
-  private int _bytesWritten;
+  private long _bytesWritten;
 
   public BitmapInvertedIndexWriter(File outputFile, int numBitmaps)
       throws IOException {
@@ -71,7 +72,7 @@ public final class BitmapInvertedIndexWriter implements 
Closeable {
       throws IOException {
     int length = bitmap.serializedSizeInBytes();
     resizeIfNecessary(length);
-    _offsetBuffer.putInt(_bytesWritten);
+    _offsetBuffer.putInt(asUnsignedInt(_bytesWritten));
     bitmap.serialize(_bitmapBuffer);
     _bytesWritten += length;
   }
@@ -84,7 +85,7 @@ public final class BitmapInvertedIndexWriter implements 
Closeable {
   public void add(byte[] bitmapBytes, int length)
       throws IOException {
     resizeIfNecessary(length);
-    _offsetBuffer.putInt(_bytesWritten);
+    _offsetBuffer.putInt(asUnsignedInt(_bytesWritten));
     _bitmapBuffer.put(bitmapBytes, 0, length);
     _bytesWritten += length;
   }
@@ -113,8 +114,8 @@ public final class BitmapInvertedIndexWriter implements 
Closeable {
   @Override
   public void close()
       throws IOException {
-    int fileLength = _bytesWritten;
-    _offsetBuffer.putInt(fileLength);
+    long fileLength = _bytesWritten;
+    _offsetBuffer.putInt(asUnsignedInt(fileLength));
     _fileChannel.truncate(fileLength);
     _fileChannel.close();
     if (CleanerUtil.UNMAP_SUPPORTED) {
@@ -123,4 +124,9 @@ public final class BitmapInvertedIndexWriter implements 
Closeable {
       cleanBitmapBuffer();
     }
   }
+
+  private int asUnsignedInt(long value) {
+    Preconditions.checkArgument(value >>> 32 == 0, "overflowed 4GB");
+    return (int) (value & 0xFFFFFFFFL);
+  }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitmapInvertedIndexReader.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitmapInvertedIndexReader.java
index 48905a7d88..af678b0018 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitmapInvertedIndexReader.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitmapInvertedIndexReader.java
@@ -40,22 +40,25 @@ public class BitmapInvertedIndexReader implements 
InvertedIndexReader<ImmutableR
   // Use the offset of the first bitmap to support 2 different format of the 
inverted index:
   //   1. Offset buffer stores the offsets within the whole data buffer 
(including offset buffer)
   //   2. Offset buffer stores the offsets within the bitmap buffer
-  private final int _firstOffset;
+  private final long _firstOffset;
 
   public BitmapInvertedIndexReader(PinotDataBuffer dataBuffer, int numBitmaps) 
{
     long offsetBufferEndOffset = (long) (numBitmaps + 1) * Integer.BYTES;
     _offsetBuffer = dataBuffer.view(0, offsetBufferEndOffset, 
ByteOrder.BIG_ENDIAN);
     _bitmapBuffer = dataBuffer.view(offsetBufferEndOffset, dataBuffer.size());
-
-    _firstOffset = _offsetBuffer.getInt(0);
+    _firstOffset = getOffset(0);
   }
 
   @SuppressWarnings("unchecked")
   @Override
   public ImmutableRoaringBitmap getDocIds(int dictId) {
-    int offset = _offsetBuffer.getInt(dictId * Integer.BYTES);
-    int length = _offsetBuffer.getInt((dictId + 1) * Integer.BYTES) - offset;
-    return new ImmutableRoaringBitmap(_bitmapBuffer.toDirectByteBuffer(offset 
- _firstOffset, length));
+    long offset = getOffset(dictId);
+    long length = getOffset(dictId + 1) - offset;
+    return new ImmutableRoaringBitmap(_bitmapBuffer.toDirectByteBuffer(offset 
- _firstOffset, (int) length));
+  }
+
+  private long getOffset(int dictId) {
+    return _offsetBuffer.getInt(dictId * Integer.BYTES) & 0xFFFFFFFFL;
   }
 
   @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to