This is an automated email from the ASF dual-hosted git repository.
kishoreg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new af90f79 Improved range queries (#7513)
af90f79 is described below
commit af90f7972802f03b211c2c22becd8a838c46854b
Author: Richard Startin <[email protected]>
AuthorDate: Tue Oct 5 17:08:40 2021 +0100
Improved range queries (#7513)
---
LICENSE-binary | 4 +--
.../org/apache/pinot/perf/BenchmarkRangeIndex.java | 15 +++++----
.../index/column/PhysicalColumnIndexContainer.java | 2 +-
.../index/readers/BitSlicedRangeIndexReader.java | 39 ++++++++++++++++------
.../index/creator/BitSlicedIndexCreatorTest.java | 16 ++++++---
pom.xml | 2 +-
6 files changed, 54 insertions(+), 24 deletions(-)
diff --git a/LICENSE-binary b/LICENSE-binary
index 08ee41e..c8c9569 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -379,8 +379,8 @@ org.eclipse.jetty:jetty-util:9.4.39.v20210325
org.javassist:javassist:3.19.0-GA
org.lz4:lz4-java:1.7.1
org.quartz-scheduler:quartz:2.3.2
-org.roaringbitmap:RoaringBitmap:0.9.21
-org.roaringbitmap:shims:0.9.21
+org.roaringbitmap:RoaringBitmap:0.9.22
+org.roaringbitmap:shims:0.9.22
org.webjars:swagger-ui:3.18.2
org.wildfly.openssl:wildfly-openssl:1.0.7.Final
org.xerial.larray:larray-buffer:0.4.1
diff --git
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java
index ae02607..25079ca 100644
--- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java
+++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java
@@ -196,7 +196,7 @@ public class BenchmarkRangeIndex {
@TearDown(Level.Trial)
public void tearDown()
throws IOException {
- FileUtils.forceDelete(_indexDir);
+ FileUtils.deleteQuietly(_indexDir);
}
protected Comparable<?> max() {
@@ -378,19 +378,22 @@ public class BenchmarkRangeIndex {
public void setup()
throws IOException {
super.setup();
- _reader = new BitSlicedRangeIndexReader(_buffer);
+ _reader = new BitSlicedRangeIndexReader(_buffer, metadata());
}
- @Override
- protected RawValueBasedInvertedIndexCreator newCreator() {
- ColumnMetadata metadata = new ColumnMetadataImpl.Builder()
+ private ColumnMetadata metadata() {
+ return new ColumnMetadataImpl.Builder()
.setFieldSpec(_fieldSpec)
.setTotalDocs(_numDocs)
.setHasDictionary(false)
.setMaxValue(max())
.setMinValue(min())
.build();
- return new BitSlicedRangeIndexCreator(_indexDir, metadata);
+ }
+
+ @Override
+ protected RawValueBasedInvertedIndexCreator newCreator() {
+ return new BitSlicedRangeIndexCreator(_indexDir, metadata());
}
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
index 49c7e2f..9ab1e7d 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
@@ -183,7 +183,7 @@ public final class PhysicalColumnIndexContainer implements
ColumnIndexContainer
if (version == RangeIndexCreator.VERSION) {
_rangeIndex = new RangeIndexReaderImpl(buffer);
} else if (version == BitSlicedRangeIndexCreator.VERSION) {
- _rangeIndex = new BitSlicedRangeIndexReader(buffer);
+ _rangeIndex = new BitSlicedRangeIndexReader(buffer, metadata);
} else {
LOGGER.warn("Unknown range index version: {}, skip loading range
index for column: {}", version,
metadata.getColumnName());
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java
index 032b4c3..41d34e1 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java
@@ -23,11 +23,13 @@ import java.nio.ByteBuffer;
import javax.annotation.Nullable;
import
org.apache.pinot.segment.local.segment.creator.impl.inv.BitSlicedRangeIndexCreator;
import org.apache.pinot.segment.local.utils.FPOrdering;
+import org.apache.pinot.segment.spi.ColumnMetadata;
import org.apache.pinot.segment.spi.index.reader.RangeIndexReader;
import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
import org.roaringbitmap.RangeBitmap;
import org.roaringbitmap.RoaringBitmap;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
public class BitSlicedRangeIndexReader implements
RangeIndexReader<ImmutableRoaringBitmap> {
@@ -35,8 +37,11 @@ public class BitSlicedRangeIndexReader implements
RangeIndexReader<ImmutableRoar
private final PinotDataBuffer _dataBuffer;
private final long _offset;
private final long _min;
+ private final long _max;
+ private final long _numDocs;
- public BitSlicedRangeIndexReader(PinotDataBuffer dataBuffer) {
+ public BitSlicedRangeIndexReader(PinotDataBuffer dataBuffer,
+ ColumnMetadata metadata) {
_dataBuffer = dataBuffer;
long offset = 0;
int version = dataBuffer.getInt(offset);
@@ -45,30 +50,34 @@ public class BitSlicedRangeIndexReader implements
RangeIndexReader<ImmutableRoar
_min = dataBuffer.getLong(offset);
offset += Long.BYTES;
_offset = offset;
+ _max = metadata.hasDictionary()
+ ? metadata.getCardinality() - 1
+ : ((Number) metadata.getMaxValue()).longValue();
+ _numDocs = metadata.getTotalDocs();
}
@Nullable
@Override
public ImmutableRoaringBitmap getMatchingDocIds(int min, int max) {
- return queryRangeBitmap(Math.max(min, _min) - _min, max - _min);
+ return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max -
_min);
}
@Nullable
@Override
public ImmutableRoaringBitmap getMatchingDocIds(long min, long max) {
- return queryRangeBitmap(Math.max(min, _min) - _min, max - _min);
+ return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max -
_min);
}
@Nullable
@Override
public ImmutableRoaringBitmap getMatchingDocIds(float min, float max) {
- return queryRangeBitmap(FPOrdering.ordinalOf(min),
FPOrdering.ordinalOf(max));
+ return queryRangeBitmap(FPOrdering.ordinalOf(min),
FPOrdering.ordinalOf(max), 0xFFFFFFFFL);
}
@Nullable
@Override
public ImmutableRoaringBitmap getMatchingDocIds(double min, double max) {
- return queryRangeBitmap(FPOrdering.ordinalOf(min),
FPOrdering.ordinalOf(max));
+ return queryRangeBitmap(FPOrdering.ordinalOf(min),
FPOrdering.ordinalOf(max), 0xFFFFFFFFFFFFFFFFL);
}
// this index supports exact matches, so always return null for partial
matches
@@ -97,12 +106,22 @@ public class BitSlicedRangeIndexReader implements
RangeIndexReader<ImmutableRoar
return null;
}
- private ImmutableRoaringBitmap queryRangeBitmap(long min, long max) {
+ private ImmutableRoaringBitmap queryRangeBitmap(long min, long max, long
columnMax) {
RangeBitmap rangeBitmap = mapRangeBitmap();
- RoaringBitmap lte = rangeBitmap.lte(max);
- RoaringBitmap gte = rangeBitmap.gte(min);
- lte.and(gte);
- return lte.toMutableRoaringBitmap();
+ if (Long.compareUnsigned(max, columnMax) < 0) {
+ RoaringBitmap lte = rangeBitmap.lte(max);
+ if (Long.compareUnsigned(min, 0) > 0) {
+ return rangeBitmap.gte(min, lte).toMutableRoaringBitmap();
+ }
+ return lte.toMutableRoaringBitmap();
+ } else {
+ if (Long.compareUnsigned(min, 0) > 0) {
+ return rangeBitmap.gte(min).toMutableRoaringBitmap();
+ }
+ MutableRoaringBitmap all = new MutableRoaringBitmap();
+ all.add(0, _numDocs);
+ return all;
+ }
}
private RangeBitmap mapRangeBitmap() {
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java
index e0a3d52..c06ebee 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java
@@ -161,7 +161,7 @@ public class BitSlicedIndexCreatorTest {
}
File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() +
BITMAP_RANGE_INDEX_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer =
PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
- BitSlicedRangeIndexReader reader = new
BitSlicedRangeIndexReader(dataBuffer);
+ BitSlicedRangeIndexReader reader = new
BitSlicedRangeIndexReader(dataBuffer, metadata);
int prev = Integer.MIN_VALUE;
for (int quantile : dataset.quantiles()) {
ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
@@ -171,6 +171,8 @@ public class BitSlicedIndexCreatorTest {
}
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1,
Integer.MAX_VALUE);
assertTrue(result != null && result.isEmpty());
+ assertEquals(reader.getMatchingDocIds(Integer.MIN_VALUE,
Integer.MAX_VALUE),
+ dataset.scan(Integer.MIN_VALUE, Integer.MAX_VALUE));
} finally {
FileUtils.forceDelete(rangeIndexFile);
}
@@ -187,7 +189,7 @@ public class BitSlicedIndexCreatorTest {
}
File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() +
BITMAP_RANGE_INDEX_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer =
PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
- BitSlicedRangeIndexReader reader = new
BitSlicedRangeIndexReader(dataBuffer);
+ BitSlicedRangeIndexReader reader = new
BitSlicedRangeIndexReader(dataBuffer, metadata);
long prev = Long.MIN_VALUE;
for (long quantile : dataset.quantiles()) {
ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
@@ -197,6 +199,8 @@ public class BitSlicedIndexCreatorTest {
}
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1,
Long.MAX_VALUE);
assertTrue(result != null && result.isEmpty());
+ assertEquals(reader.getMatchingDocIds(Long.MIN_VALUE, Long.MAX_VALUE),
+ dataset.scan(Long.MIN_VALUE, Long.MAX_VALUE));
} finally {
FileUtils.forceDelete(rangeIndexFile);
}
@@ -213,7 +217,7 @@ public class BitSlicedIndexCreatorTest {
}
File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() +
BITMAP_RANGE_INDEX_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer =
PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
- BitSlicedRangeIndexReader reader = new
BitSlicedRangeIndexReader(dataBuffer);
+ BitSlicedRangeIndexReader reader = new
BitSlicedRangeIndexReader(dataBuffer, metadata);
float prev = Float.NEGATIVE_INFINITY;
for (float quantile : dataset.quantiles()) {
ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
@@ -223,6 +227,8 @@ public class BitSlicedIndexCreatorTest {
}
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1,
Float.POSITIVE_INFINITY);
assertTrue(result != null && result.isEmpty());
+ assertEquals(reader.getMatchingDocIds(Float.NEGATIVE_INFINITY,
Float.POSITIVE_INFINITY),
+ dataset.scan(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
} finally {
FileUtils.forceDelete(rangeIndexFile);
}
@@ -239,7 +245,7 @@ public class BitSlicedIndexCreatorTest {
}
File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() +
BITMAP_RANGE_INDEX_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer =
PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
- BitSlicedRangeIndexReader reader = new
BitSlicedRangeIndexReader(dataBuffer);
+ BitSlicedRangeIndexReader reader = new
BitSlicedRangeIndexReader(dataBuffer, metadata);
double prev = Double.NEGATIVE_INFINITY;
for (double quantile : dataset.quantiles()) {
ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
@@ -249,6 +255,8 @@ public class BitSlicedIndexCreatorTest {
}
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1,
Double.POSITIVE_INFINITY);
assertTrue(result != null && result.isEmpty());
+ assertEquals(reader.getMatchingDocIds(Double.NEGATIVE_INFINITY,
Double.POSITIVE_INFINITY),
+ dataset.scan(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
} finally {
FileUtils.forceDelete(rangeIndexFile);
}
diff --git a/pom.xml b/pom.xml
index 539243e..168ec2a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -436,7 +436,7 @@
<dependency>
<groupId>org.roaringbitmap</groupId>
<artifactId>RoaringBitmap</artifactId>
- <version>0.9.21</version>
+ <version>0.9.22</version>
</dependency>
<dependency>
<groupId>com.101tec</groupId>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]