richardstartin commented on a change in pull request #6320:
URL: https://github.com/apache/incubator-pinot/pull/6320#discussion_r536911397



##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/OnHeapBitmapInvertedIndexCreator.java
##########
@@ -19,68 +19,85 @@
 package org.apache.pinot.core.segment.creator.impl.inv;
 
 import com.google.common.base.Preconditions;
-import java.io.BufferedOutputStream;
-import java.io.DataOutputStream;
+
+
 import java.io.File;
-import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
 import org.apache.commons.io.FileUtils;
 import 
org.apache.pinot.core.segment.creator.DictionaryBasedInvertedIndexCreator;
 import org.apache.pinot.core.segment.creator.impl.V1Constants;
+import org.apache.pinot.core.util.CleanerUtil;
+import org.roaringbitmap.RoaringBitmapWriter;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
+import static java.nio.ByteOrder.LITTLE_ENDIAN;
+
 
 /**
  * Implementation of {@link DictionaryBasedInvertedIndexCreator} that uses 
on-heap memory.
  */
 public final class OnHeapBitmapInvertedIndexCreator implements 
DictionaryBasedInvertedIndexCreator {
   private final File _invertedIndexFile;
-  private final MutableRoaringBitmap[] _bitmaps;
+  private final RoaringBitmapWriter<MutableRoaringBitmap>[] _bitmapWriters;
   private int _nextDocId;
 
+  @SuppressWarnings("unchecked")
   public OnHeapBitmapInvertedIndexCreator(File indexDir, String columnName, 
int cardinality) {
     _invertedIndexFile = new File(indexDir, columnName + 
V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
-    _bitmaps = new MutableRoaringBitmap[cardinality];
+    _bitmapWriters = new RoaringBitmapWriter[cardinality];
     for (int i = 0; i < cardinality; i++) {
-      _bitmaps[i] = new MutableRoaringBitmap();
+      _bitmapWriters[i] = RoaringBitmapWriter.bufferWriter().get();
     }
   }
 
   @Override
   public void add(int dictId) {
-    _bitmaps[dictId].add(_nextDocId++);
+    _bitmapWriters[dictId].add(_nextDocId++);
   }
 
   @Override
   public void add(int[] dictIds, int length) {
     for (int i = 0; i < length; i++) {
-      _bitmaps[dictIds[i]].add(_nextDocId);
+      _bitmapWriters[dictIds[i]].add(_nextDocId);
     }
     _nextDocId++;
   }
 
   @Override
   public void seal()
       throws IOException {
-    try (DataOutputStream out = new DataOutputStream(
-        new BufferedOutputStream(new FileOutputStream(_invertedIndexFile)))) {
+    // calculate file size
+    int size = (_bitmapWriters.length + 1) * Integer.BYTES;
+    for (RoaringBitmapWriter<MutableRoaringBitmap> writer : _bitmapWriters) {
+      size += writer.get().serializedSizeInBytes();
+      // Check for int overflow
+      Preconditions.checkState(size > 0, "Inverted index file: %s exceeds 2GB 
limit", _invertedIndexFile);
+    }
+    ByteBuffer buffer = null;
+    try (FileChannel channel = new RandomAccessFile(_invertedIndexFile, 
"rw").getChannel()) {
+      buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 
size).order(LITTLE_ENDIAN);
       // Write bitmap offsets
-      int bitmapOffset = (_bitmaps.length + 1) * Integer.BYTES;
-      out.writeInt(bitmapOffset);
-      for (MutableRoaringBitmap bitmap : _bitmaps) {
-        bitmapOffset += bitmap.serializedSizeInBytes();
-        // Check for int overflow
-        Preconditions.checkState(bitmapOffset > 0, "Inverted index file: %s 
exceeds 2GB limit", _invertedIndexFile);
-        out.writeInt(bitmapOffset);
+      int bitmapOffset = (_bitmapWriters.length + 1) * Integer.BYTES;
+      buffer.putInt(Integer.reverseBytes(bitmapOffset));
+      for (RoaringBitmapWriter<MutableRoaringBitmap> writer : _bitmapWriters) {
+        bitmapOffset += writer.getUnderlying().serializedSizeInBytes();

Review comment:
       This API is a little bit overcomplicated. `getUnderlying()` gets the 
bitmap, without flushing. `flush()` flushes. `get()` flushes and gets the 
bitmap. I shouldn't have used `getUnderlying()` here because it's confusing, 
perhaps it should be deprecated

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/OnHeapBitmapInvertedIndexCreator.java
##########
@@ -19,68 +19,85 @@
 package org.apache.pinot.core.segment.creator.impl.inv;
 
 import com.google.common.base.Preconditions;
-import java.io.BufferedOutputStream;
-import java.io.DataOutputStream;
+
+
 import java.io.File;
-import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
 import org.apache.commons.io.FileUtils;
 import 
org.apache.pinot.core.segment.creator.DictionaryBasedInvertedIndexCreator;
 import org.apache.pinot.core.segment.creator.impl.V1Constants;
+import org.apache.pinot.core.util.CleanerUtil;
+import org.roaringbitmap.RoaringBitmapWriter;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
+import static java.nio.ByteOrder.LITTLE_ENDIAN;
+
 
 /**
  * Implementation of {@link DictionaryBasedInvertedIndexCreator} that uses 
on-heap memory.
  */
 public final class OnHeapBitmapInvertedIndexCreator implements 
DictionaryBasedInvertedIndexCreator {
   private final File _invertedIndexFile;
-  private final MutableRoaringBitmap[] _bitmaps;
+  private final RoaringBitmapWriter<MutableRoaringBitmap>[] _bitmapWriters;
   private int _nextDocId;
 
+  @SuppressWarnings("unchecked")
   public OnHeapBitmapInvertedIndexCreator(File indexDir, String columnName, 
int cardinality) {
     _invertedIndexFile = new File(indexDir, columnName + 
V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
-    _bitmaps = new MutableRoaringBitmap[cardinality];
+    _bitmapWriters = new RoaringBitmapWriter[cardinality];
     for (int i = 0; i < cardinality; i++) {
-      _bitmaps[i] = new MutableRoaringBitmap();
+      _bitmapWriters[i] = RoaringBitmapWriter.bufferWriter().get();
     }
   }
 
   @Override
   public void add(int dictId) {
-    _bitmaps[dictId].add(_nextDocId++);
+    _bitmapWriters[dictId].add(_nextDocId++);
   }
 
   @Override
   public void add(int[] dictIds, int length) {
     for (int i = 0; i < length; i++) {
-      _bitmaps[dictIds[i]].add(_nextDocId);
+      _bitmapWriters[dictIds[i]].add(_nextDocId);
     }
     _nextDocId++;
   }
 
   @Override
   public void seal()
       throws IOException {
-    try (DataOutputStream out = new DataOutputStream(
-        new BufferedOutputStream(new FileOutputStream(_invertedIndexFile)))) {
+    // calculate file size
+    int size = (_bitmapWriters.length + 1) * Integer.BYTES;
+    for (RoaringBitmapWriter<MutableRoaringBitmap> writer : _bitmapWriters) {
+      size += writer.get().serializedSizeInBytes();
+      // Check for int overflow
+      Preconditions.checkState(size > 0, "Inverted index file: %s exceeds 2GB 
limit", _invertedIndexFile);
+    }
+    ByteBuffer buffer = null;
+    try (FileChannel channel = new RandomAccessFile(_invertedIndexFile, 
"rw").getChannel()) {
+      buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 
size).order(LITTLE_ENDIAN);

Review comment:
       Yes that's a good idea.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to