This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 95b0eda map bitmaps through a bounded window to avoid excessive disk
pressure… (#7535)
95b0eda is described below
commit 95b0eda056d13e97e1588f2db05cc656571d9df1
Author: Richard Startin <[email protected]>
AuthorDate: Mon Oct 11 19:17:46 2021 +0100
map bitmaps through a bounded window to avoid excessive disk pressure…
(#7535)
The assumption behind BitmapInvertedIndexWriter is that there is a lot of
disk space available, and that it's ok to overmap and truncate in order to
avoid syscall overhead, but this results in high disk usage. Allocating file
space for the serialised inverted indexes as it is needed feels like a good
usability tradeoff.
The strategy builds on some knowledge about how the bitmaps get serialised:
a single bitmap never needs more than 256MB, so we never map more than 256MB at
a time. Bitmaps should be fairly sparse most of the time, so we guess that we
need numBitmaps * 128KB but if this estimate proves to be too small, the
pessimistic 256MB is used for the next section.
---
.../impl/inv/BitmapInvertedIndexWriter.java | 54 ++++++--
.../creator/inv/BitmapInvertedIndexWriterTest.java | 144 +++++++++++++++++++++
2 files changed, 189 insertions(+), 9 deletions(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
index a00256d..22fcf51 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
@@ -48,43 +48,79 @@ import org.roaringbitmap.RoaringBitmap;
* </pre>
*/
public final class BitmapInvertedIndexWriter implements Closeable {
+ // 264MB - worst case serialized size of a single bitmap with
Integer.MAX_VALUE rows
+ private static final long MAX_INITIAL_BUFFER_SIZE = 256 << 20;
+ // 128KB derived from 1M rows (15 containers), worst case 8KB per container
= 120KB + 8KB extra
+ private static final long PESSIMISTIC_BITMAP_SIZE_ESTIMATE = 128 << 10;
private final FileChannel _fileChannel;
private final ByteBuffer _offsetBuffer;
- private final ByteBuffer _bitmapBuffer;
+ private ByteBuffer _bitmapBuffer;
+ private int _bytesWritten;
public BitmapInvertedIndexWriter(File outputFile, int numBitmaps)
throws IOException {
+ int sizeForOffsets = (numBitmaps + 1) * Integer.BYTES;
+ long bitmapBufferEstimate = Math.min(PESSIMISTIC_BITMAP_SIZE_ESTIMATE *
numBitmaps, MAX_INITIAL_BUFFER_SIZE);
_fileChannel = new RandomAccessFile(outputFile, "rw").getChannel();
- _offsetBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, 0,
Integer.MAX_VALUE);
- _bitmapBuffer = _offsetBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN);
- _bitmapBuffer.position((numBitmaps + 1) * Integer.BYTES);
+ _offsetBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, 0,
sizeForOffsets);
+ _bytesWritten = sizeForOffsets;
+ mapBitmapBuffer(bitmapBufferEstimate);
}
public void add(RoaringBitmap bitmap)
throws IOException {
- _offsetBuffer.putInt(_bitmapBuffer.position());
+ int length = bitmap.serializedSizeInBytes();
+ resizeIfNecessary(length);
+ _offsetBuffer.putInt(_bytesWritten);
bitmap.serialize(_bitmapBuffer);
+ _bytesWritten += length;
}
- public void add(byte[] bitmapBytes) {
+ public void add(byte[] bitmapBytes)
+ throws IOException {
add(bitmapBytes, bitmapBytes.length);
}
- public void add(byte[] bitmapBytes, int length) {
- _offsetBuffer.putInt(_bitmapBuffer.position());
+ public void add(byte[] bitmapBytes, int length)
+ throws IOException {
+ resizeIfNecessary(length);
+ _offsetBuffer.putInt(_bytesWritten);
_bitmapBuffer.put(bitmapBytes, 0, length);
+ _bytesWritten += length;
+ }
+
+ private void resizeIfNecessary(int required)
+ throws IOException {
+ if (_bitmapBuffer.capacity() - required < _bitmapBuffer.position()) {
+ mapBitmapBuffer(Math.max(MAX_INITIAL_BUFFER_SIZE, required));
+ }
+ }
+
+ private void mapBitmapBuffer(long size)
+ throws IOException {
+ cleanBitmapBuffer();
+ _bitmapBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE,
_bytesWritten, size)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ }
+
+ private void cleanBitmapBuffer()
+ throws IOException {
+ if (_bitmapBuffer != null && CleanerUtil.UNMAP_SUPPORTED) {
+ CleanerUtil.getCleaner().freeBuffer(_bitmapBuffer);
+ }
}
@Override
public void close()
throws IOException {
- int fileLength = _bitmapBuffer.position();
+ int fileLength = _bytesWritten;
_offsetBuffer.putInt(fileLength);
_fileChannel.truncate(fileLength);
_fileChannel.close();
if (CleanerUtil.UNMAP_SUPPORTED) {
CleanerUtil.BufferCleaner cleaner = CleanerUtil.getCleaner();
cleaner.freeBuffer(_offsetBuffer);
+ cleanBitmapBuffer();
}
}
}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/inv/BitmapInvertedIndexWriterTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/inv/BitmapInvertedIndexWriterTest.java
new file mode 100644
index 0000000..63842e5
--- /dev/null
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/inv/BitmapInvertedIndexWriterTest.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.creator.inv;
+
+import com.google.common.collect.Collections2;
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import org.apache.commons.io.FileUtils;
+import
org.apache.pinot.segment.local.segment.creator.impl.inv.BitmapInvertedIndexWriter;
+import
org.apache.pinot.segment.local.segment.index.readers.BitmapInvertedIndexReader;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.roaringbitmap.RoaringBitmap;
+import org.roaringbitmap.RoaringBitmapWriter;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.AfterTest;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class BitmapInvertedIndexWriterTest {
+
+ private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(),
"BitmapInvertedIndexWriterTest");
+
+ private RoaringBitmap[] _bitmaps;
+
+ @BeforeClass
+ public void setUp()
+ throws IOException {
+ FileUtils.forceMkdir(INDEX_DIR);
+ RoaringBitmap huge = huge();
+ RoaringBitmap small = small();
+ RoaringBitmap empty = empty();
+ _bitmaps = new RoaringBitmap[] { huge, small, empty };
+ }
+
+ @AfterClass
+ public void tearDown()
+ throws IOException {
+ FileUtils.forceDelete(INDEX_DIR);
+ _bitmaps = null;
+ }
+
+ private File _file;
+
+ @BeforeTest
+ public void before() {
+ _file = new File(INDEX_DIR, UUID.randomUUID().toString());
+ }
+
+ @AfterTest
+ public void after() {
+ FileUtils.deleteQuietly(_file);
+ }
+
+ @DataProvider(name = "bitmaps")
+ public Object[][] bitmaps() {
+ // don't return bitmaps directly because TestNG will create huge test names
+ Collection<List<Integer>> permutations = Collections2.permutations(
+ IntStream.range(0, _bitmaps.length +
1).boxed().collect(Collectors.toList())
+ );
+ Object[][] testCases = new Object[permutations.size()][];
+ int i = 0;
+ for (List<Integer> permutation : permutations) {
+ int[] ints = new int[permutation.size()];
+ int j = 0;
+ for (Integer boxed : permutation) {
+ ints[j++] = boxed % _bitmaps.length;
+ }
+ testCases[i++] = new Object[] { ints };
+ }
+ return testCases;
+ }
+
+ @Test(dataProvider = "bitmaps", testName = "test write bitmaps with
permutation = ")
+ public void testWriteBitmaps(int[] indices)
+ throws IOException {
+ // indirection because TestNG will create huge test names otherwise
+ RoaringBitmap[] bitmaps = new RoaringBitmap[indices.length];
+ int i = 0;
+ for (int index : indices) {
+ bitmaps[i++] = _bitmaps[index];
+ }
+ try (BitmapInvertedIndexWriter writer = new
BitmapInvertedIndexWriter(_file, bitmaps.length)) {
+ for (RoaringBitmap bitmap : bitmaps) {
+ writer.add(bitmap);
+ }
+ }
+ verifyReadable(bitmaps);
+ }
+
+ private void verifyReadable(RoaringBitmap[] bitmaps)
+ throws IOException {
+ try (PinotDataBuffer buffer =
PinotDataBuffer.mapReadOnlyBigEndianFile(_file);
+ BitmapInvertedIndexReader reader = new
BitmapInvertedIndexReader(buffer, bitmaps.length)) {
+ int dictId = 0;
+ for (RoaringBitmap bitmap : bitmaps) {
+ ImmutableRoaringBitmap persisted = reader.getDocIds(dictId++);
+ assertEquals(bitmap.getCardinality(), persisted.getCardinality());
+ }
+ }
+ }
+
+ private static RoaringBitmap empty() {
+ return new RoaringBitmap();
+ }
+
+ private static RoaringBitmap small() {
+ return RoaringBitmap.bitmapOf(1, 10, 100, 1000, 10000, 100000, 1000000,
10000000);
+ }
+
+ private static RoaringBitmap huge() {
+ RoaringBitmapWriter<RoaringBitmap> bitmap =
RoaringBitmapWriter.writer().constantMemory().get();
+ for (int i = 0; i < Integer.MAX_VALUE - 1; i += 2) {
+ bitmap.add(i);
+ }
+ return bitmap.get();
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]