Jackie-Jiang commented on a change in pull request #7535:
URL: https://github.com/apache/pinot/pull/7535#discussion_r725287957



##########
File path: 
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
##########
@@ -48,37 +48,64 @@
  * </pre>
  */
 public final class BitmapInvertedIndexWriter implements Closeable {
+  // 256MB - worst case serialized size of a single bitmap with 
Integer.MAX_VALUE rows
+  private static final long MAX_INITIAL_BUFFER_SIZE = 256 << 20;
+  // 128KB derived from 1M rows (15 containers), worst case 8KB per container 
= 120KB + 8KB extra
+  private static final long PESSIMISTIC_BITMAP_SIZE_ESTIMATE = 128 << 10;
   private final FileChannel _fileChannel;
   private final ByteBuffer _offsetBuffer;
-  private final ByteBuffer _bitmapBuffer;
+  private ByteBuffer _bitmapBuffer;
+  private int _bytesWritten;
 
   public BitmapInvertedIndexWriter(File outputFile, int numBitmaps)
       throws IOException {
+    int sizeForOffsets = (numBitmaps + 1) * Integer.BYTES;
+    long bitmapBufferEstimate = Math.min(PESSIMISTIC_BITMAP_SIZE_ESTIMATE * 
numBitmaps, MAX_INITIAL_BUFFER_SIZE);
     _fileChannel = new RandomAccessFile(outputFile, "rw").getChannel();
-    _offsetBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, 
Integer.MAX_VALUE);
-    _bitmapBuffer = _offsetBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN);
-    _bitmapBuffer.position((numBitmaps + 1) * Integer.BYTES);
+    _offsetBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, 
sizeForOffsets);
+    _bytesWritten = sizeForOffsets;
+    mapBitmapBuffer(bitmapBufferEstimate);
   }
 
   public void add(RoaringBitmap bitmap)
       throws IOException {
-    _offsetBuffer.putInt(_bitmapBuffer.position());
+    int length = bitmap.serializedSizeInBytes();
+    resizeIfNecessary(length);
+    _offsetBuffer.putInt(_bytesWritten);
     bitmap.serialize(_bitmapBuffer);
+    _bytesWritten += length;
   }
 
-  public void add(byte[] bitmapBytes) {
+  public void add(byte[] bitmapBytes)
+      throws IOException {
     add(bitmapBytes, bitmapBytes.length);
   }
 
-  public void add(byte[] bitmapBytes, int length) {
-    _offsetBuffer.putInt(_bitmapBuffer.position());
+  public void add(byte[] bitmapBytes, int length)
+      throws IOException {
+    resizeIfNecessary(length);
+    _offsetBuffer.putInt(_bytesWritten);
     _bitmapBuffer.put(bitmapBytes, 0, length);
+    _bytesWritten += length;
+  }
+
+  private void resizeIfNecessary(int required)
+      throws IOException {
+    if (_bitmapBuffer.capacity() - required < _bitmapBuffer.position()) {
+      mapBitmapBuffer(MAX_INITIAL_BUFFER_SIZE);
+    }
+  }
+
+  private void mapBitmapBuffer(long size)
+      throws IOException {
+    _bitmapBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, 
_bytesWritten, size)
+        .order(ByteOrder.LITTLE_ENDIAN);
   }
 
   @Override
   public void close()
       throws IOException {
-    int fileLength = _bitmapBuffer.position();
+    int fileLength = _bytesWritten;

Review comment:
       I think we need to free the bitmap buffer because it is no longer backed 
by the offset buffer

##########
File path: 
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
##########
@@ -48,37 +48,64 @@
  * </pre>
  */
 public final class BitmapInvertedIndexWriter implements Closeable {
+  // 256MB - worst case serialized size of a single bitmap with 
Integer.MAX_VALUE rows
+  private static final long MAX_INITIAL_BUFFER_SIZE = 256 << 20;
+  // 128KB derived from 1M rows (15 containers), worst case 8KB per container 
= 120KB + 8KB extra
+  private static final long PESSIMISTIC_BITMAP_SIZE_ESTIMATE = 128 << 10;
   private final FileChannel _fileChannel;
   private final ByteBuffer _offsetBuffer;
-  private final ByteBuffer _bitmapBuffer;
+  private ByteBuffer _bitmapBuffer;
+  private int _bytesWritten;
 
   public BitmapInvertedIndexWriter(File outputFile, int numBitmaps)
       throws IOException {
+    int sizeForOffsets = (numBitmaps + 1) * Integer.BYTES;
+    long bitmapBufferEstimate = Math.min(PESSIMISTIC_BITMAP_SIZE_ESTIMATE * 
numBitmaps, MAX_INITIAL_BUFFER_SIZE);
     _fileChannel = new RandomAccessFile(outputFile, "rw").getChannel();
-    _offsetBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, 
Integer.MAX_VALUE);
-    _bitmapBuffer = _offsetBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN);
-    _bitmapBuffer.position((numBitmaps + 1) * Integer.BYTES);
+    _offsetBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, 
sizeForOffsets);
+    _bytesWritten = sizeForOffsets;
+    mapBitmapBuffer(bitmapBufferEstimate);
   }
 
   public void add(RoaringBitmap bitmap)
       throws IOException {
-    _offsetBuffer.putInt(_bitmapBuffer.position());
+    int length = bitmap.serializedSizeInBytes();
+    resizeIfNecessary(length);
+    _offsetBuffer.putInt(_bytesWritten);
     bitmap.serialize(_bitmapBuffer);
+    _bytesWritten += length;
   }
 
-  public void add(byte[] bitmapBytes) {
+  public void add(byte[] bitmapBytes)
+      throws IOException {
     add(bitmapBytes, bitmapBytes.length);
   }
 
-  public void add(byte[] bitmapBytes, int length) {
-    _offsetBuffer.putInt(_bitmapBuffer.position());
+  public void add(byte[] bitmapBytes, int length)
+      throws IOException {
+    resizeIfNecessary(length);
+    _offsetBuffer.putInt(_bytesWritten);
     _bitmapBuffer.put(bitmapBytes, 0, length);
+    _bytesWritten += length;
+  }
+
+  private void resizeIfNecessary(int required)
+      throws IOException {
+    if (_bitmapBuffer.capacity() - required < _bitmapBuffer.position()) {
+      mapBitmapBuffer(MAX_INITIAL_BUFFER_SIZE);

Review comment:
       Should we free the old bitmap buffer during resize?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to