This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 69aac6c8dd Direct UTF-8 access for "in" filters. (#12517)
69aac6c8dd is described below
commit 69aac6c8dd5588ed19f0cf90f11025317133df57
Author: Gian Merlino <[email protected]>
AuthorDate: Fri May 20 01:51:28 2022 -0700
Direct UTF-8 access for "in" filters. (#12517)
* Direct UTF-8 access for "in" filters.
Directly related:
1) InDimFilter: Store stored Strings (in ValuesSet) plus sorted UTF-8
ByteBuffers (in valuesUtf8). Use valuesUtf8 whenever possible. If
necessary, the input set is copied into a ValuesSet. Much logic is
simplified, because we always know what type the values set will be.
I think that there won't even be an efficiency loss in most cases.
InDimFilter is most frequently created by deserialization, and this
patch updates the JsonCreator constructor to deserialize
directly into a ValuesSet.
2) Add Utf8ValueSetIndex, which InDimFilter uses to avoid UTF-8 decodes
during index lookups.
3) Add unsigned comparator to ByteBufferUtils and use it in
GenericIndexed.BYTE_BUFFER_STRATEGY. This is important because UTF-8
bytes can be compared as bytes if, and only if, the comparison
is unsigned.
4) Add specialization to GenericIndexed.singleThreaded().indexOf that
avoids needless ByteBuffer allocations.
5) Clarify that objects returned by ColumnIndexSupplier.as are not
thread-safe. DictionaryEncodedStringIndexSupplier now calls
singleThreaded() on all relevant GenericIndexed objects, saving
a ByteBuffer allocation per access.
Also:
1) Fix performance regression in LikeFilter: since #12315, it applied
the suffix matcher to all values in range even for type MATCH_ALL.
2) Add ObjectStrategy.canCompare() method. This fixes LikeFilterBenchmark,
which was broken due to calls to strategy.compare in
GenericIndexed.fromIterable.
* Add like-filter implementation tests.
* Add in-filter implementation tests.
* Add tests, fix issues.
* Fix style.
* Adjustments from review.
---
.../druid/benchmark/BoundFilterBenchmark.java | 11 +-
.../DimensionPredicateFilterBenchmark.java | 21 ++-
...FilterBenchmark.java => InFilterBenchmark.java} | 131 ++++++----------
.../druid/benchmark/LikeFilterBenchmark.java | 13 +-
.../druid/java/util/common/ByteBufferUtils.java | 79 ++++++++++
.../apache/druid/java/util/common/StringUtils.java | 15 ++
.../apache/druid/common/utils/StringUtilsTest.java | 11 ++
.../java/util/common/ByteBufferUtilsTest.java | 52 +++++++
.../main/java/org/apache/druid/query/Druids.java | 2 +-
.../org/apache/druid/query/filter/InDimFilter.java | 169 +++++++++++----------
.../apache/druid/query/filter/LikeDimFilter.java | 5 +-
.../druid/query/filter/SelectorDimFilter.java | 5 +-
.../apache/druid/query/topn/TopNQueryBuilder.java | 6 +-
.../java/org/apache/druid/segment/IndexIO.java | 1 +
.../druid/segment/column/ColumnIndexSupplier.java | 2 +
.../segment/column/LexicographicalRangeIndex.java | 16 +-
.../druid/segment/column/StringValueSetIndex.java | 7 +-
...ngValueSetIndex.java => Utf8ValueSetIndex.java} | 19 +--
.../segment/data/ConciseBitmapSerdeFactory.java | 7 +
.../DecompressingByteBufferObjectStrategy.java | 6 +
.../apache/druid/segment/data/GenericIndexed.java | 97 +++++++++---
.../druid/segment/data/GenericIndexedWriter.java | 6 +
.../apache/druid/segment/data/ObjectStrategy.java | 8 +
.../segment/data/RoaringBitmapSerdeFactory.java | 6 +
.../apache/druid/segment/filter/LikeFilter.java | 7 +-
.../org/apache/druid/segment/join/Joinable.java | 21 ++-
.../segment/join/table/IndexedTableJoinable.java | 16 +-
.../serde/DictionaryEncodedColumnPartSerde.java | 1 +
.../DictionaryEncodedStringIndexSupplier.java | 135 ++++++++++++----
.../segment/virtual/ListFilteredVirtualColumn.java | 15 +-
.../apache/druid/query/filter/InDimFilterTest.java | 73 ++++++++-
.../druid/query/filter/LikeDimFilterTest.java | 57 +++++++
.../segment/filter/ExtractionDimFilterTest.java | 6 +
.../apache/druid/segment/filter/InFilterTest.java | 2 +-
.../druid/segment/join/JoinFilterAnalyzerTest.java | 54 +++----
.../builtin/ArrayOverlapOperatorConversion.java | 4 +-
.../calcite/filtration/ConvertSelectorsToIns.java | 4 +-
37 files changed, 771 insertions(+), 319 deletions(-)
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
index eecb15ed04..cdb3cf2f7c 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
@@ -27,6 +27,7 @@ import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.extendedset.intset.ConciseSetUtils;
+import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.filter.BoundDimFilter;
import org.apache.druid.query.filter.ColumnIndexSelector;
import org.apache.druid.query.ordering.StringComparators;
@@ -48,6 +49,7 @@ import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
@@ -160,7 +162,7 @@ public class BoundFilterBenchmark
final BitmapSerdeFactory serdeFactory = new
RoaringBitmapSerdeFactory(null);
final List<Integer> ints = generateInts();
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
- FluentIterable.from(ints).transform(i -> i.toString()),
+ FluentIterable.from(ints).transform(Object::toString),
GenericIndexed.STRING_STRATEGY
);
final GenericIndexed<ImmutableBitmap> bitmaps =
GenericIndexed.fromIterable(
@@ -174,9 +176,14 @@ public class BoundFilterBenchmark
),
serdeFactory.getObjectStrategy()
);
+ final GenericIndexed<ByteBuffer> dictionaryUtf8 =
GenericIndexed.fromIterable(
+ FluentIterable.from(ints)
+ .transform(i ->
ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
+ GenericIndexed.BYTE_BUFFER_STRATEGY
+ );
selector = new MockColumnIndexSelector(
bitmapFactory,
- new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary,
bitmaps, null)
+ new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary,
dictionaryUtf8, bitmaps, null)
);
}
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
index 09e13bd79d..f01b09a79f 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
@@ -19,7 +19,6 @@
package org.apache.druid.benchmark;
-import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.FluentIterable;
@@ -28,6 +27,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.filter.ColumnIndexSelector;
import org.apache.druid.query.filter.DruidDoublePredicate;
import org.apache.druid.query.filter.DruidFloatPredicate;
@@ -51,6 +51,7 @@ import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
@@ -123,18 +124,14 @@ public class DimensionPredicateFilterBenchmark
final List<Integer> ints = generateInts();
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
FluentIterable.from(ints)
- .transform(
- new Function<Integer, String>()
- {
- @Override
- public String apply(Integer i)
- {
- return i.toString();
- }
- }
- ),
+ .transform(Object::toString),
GenericIndexed.STRING_STRATEGY
);
+ final GenericIndexed<ByteBuffer> dictionaryUtf8 =
GenericIndexed.fromIterable(
+ FluentIterable.from(ints)
+ .transform(i ->
ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
+ GenericIndexed.BYTE_BUFFER_STRATEGY
+ );
final GenericIndexed<ImmutableBitmap> bitmaps =
GenericIndexed.fromIterable(
FluentIterable.from(ints)
.transform(
@@ -148,7 +145,7 @@ public class DimensionPredicateFilterBenchmark
);
selector = new MockColumnIndexSelector(
bitmapFactory,
- new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary,
bitmaps, null)
+ new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary,
dictionaryUtf8, bitmaps, null)
);
}
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java
similarity index 50%
copy from
benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
copy to
benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java
index 09e13bd79d..9556be4cb9 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java
@@ -19,24 +19,18 @@
package org.apache.druid.benchmark;
-import com.google.common.base.Function;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Predicate;
import com.google.common.collect.FluentIterable;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.filter.ColumnIndexSelector;
-import org.apache.druid.query.filter.DruidDoublePredicate;
-import org.apache.druid.query.filter.DruidFloatPredicate;
-import org.apache.druid.query.filter.DruidLongPredicate;
-import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.query.filter.InDimFilter;
import org.apache.druid.segment.data.BitmapSerdeFactory;
import org.apache.druid.segment.data.GenericIndexed;
import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
-import org.apache.druid.segment.filter.DimensionPredicateFilter;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier;
import org.openjdk.jmh.annotations.Benchmark;
@@ -50,69 +44,38 @@ import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
-import java.util.ArrayList;
-import java.util.List;
+import java.nio.ByteBuffer;
import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 10)
@Measurement(iterations = 10)
-public class DimensionPredicateFilterBenchmark
+public class InFilterBenchmark
{
static {
NullHandling.initializeForTests();
}
- private static final int START_INT = 1_000_000_000;
+ private static final int START_INT = 10_000_000;
- private static final DimensionPredicateFilter IS_EVEN = new
DimensionPredicateFilter(
- "foo",
- new DruidPredicateFactory()
- {
- @Override
- public Predicate<String> makeStringPredicate()
- {
- return new Predicate<String>()
- {
- @Override
- public boolean apply(String input)
- {
- if (input == null) {
- return false;
- }
- return Integer.parseInt(input) % 2 == 0;
- }
- };
- }
+ private InDimFilter inFilter;
- @Override
- public DruidLongPredicate makeLongPredicate()
- {
- return DruidLongPredicate.ALWAYS_FALSE;
- }
+ // cardinality of the dictionary. it will contain this many ints (as
strings, of course), starting at START_INT,
+ // even numbers only.
+ @Param({"1000000"})
+ int dictionarySize;
- @Override
- public DruidFloatPredicate makeFloatPredicate()
- {
- return DruidFloatPredicate.ALWAYS_FALSE;
- }
+ // cardinality of the "in" filter. half of its values will be in the
dictionary, half will not.
+ @Param({"10000"})
+ int filterSize;
- @Override
- public DruidDoublePredicate makeDoublePredicate()
- {
- return DruidDoublePredicate.ALWAYS_FALSE;
- }
- },
- null
- );
-
- // cardinality, the dictionary will contain integers starting from START_INT
- @Param({"1000", "100000", "1000000"})
- int cardinality;
-
- // selector will contain a cardinality number of bitmaps; each one contains
a single int: 0
+ // selector will contain a "dictionarySize" number of bitmaps; each one
contains a single int.
+ // this benchmark is not about bitmap union speed, so no need for that part
to be realistic.
ColumnIndexSelector selector;
@Setup
@@ -120,55 +83,51 @@ public class DimensionPredicateFilterBenchmark
{
final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
final BitmapSerdeFactory serdeFactory = new
RoaringBitmapSerdeFactory(null);
- final List<Integer> ints = generateInts();
+ final Iterable<Integer> ints = intGenerator();
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
FluentIterable.from(ints)
- .transform(
- new Function<Integer, String>()
- {
- @Override
- public String apply(Integer i)
- {
- return i.toString();
- }
- }
- ),
+ .transform(Object::toString),
GenericIndexed.STRING_STRATEGY
);
- final GenericIndexed<ImmutableBitmap> bitmaps =
GenericIndexed.fromIterable(
+ final GenericIndexed<ByteBuffer> dictionaryUtf8 =
GenericIndexed.fromIterable(
FluentIterable.from(ints)
- .transform(
- i -> {
- final MutableBitmap mutableBitmap =
bitmapFactory.makeEmptyMutableBitmap();
- mutableBitmap.add(i - START_INT);
- return
bitmapFactory.makeImmutableBitmap(mutableBitmap);
- }
- ),
+ .transform(i ->
ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
+ GenericIndexed.BYTE_BUFFER_STRATEGY
+ );
+ final GenericIndexed<ImmutableBitmap> bitmaps =
GenericIndexed.fromIterable(
+ () -> IntStream.range(0, dictionarySize)
+ .mapToObj(
+ i -> {
+ final MutableBitmap mutableBitmap =
bitmapFactory.makeEmptyMutableBitmap();
+ mutableBitmap.add(i);
+ return
bitmapFactory.makeImmutableBitmap(mutableBitmap);
+ }
+ )
+ .iterator(),
serdeFactory.getObjectStrategy()
);
selector = new MockColumnIndexSelector(
bitmapFactory,
- new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary,
bitmaps, null)
+ new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary,
dictionaryUtf8, bitmaps, null)
+ );
+ inFilter = new InDimFilter(
+ "dummy",
+ IntStream.range(START_INT, START_INT +
filterSize).mapToObj(String::valueOf).collect(Collectors.toSet())
);
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
- public void matchIsEven()
+ public void doFilter(Blackhole blackhole)
{
- final ImmutableBitmap bitmapIndex =
Filters.computeDefaultBitmapResults(IS_EVEN, selector);
- Preconditions.checkState(bitmapIndex.size() == cardinality / 2);
+ final ImmutableBitmap bitmapIndex =
Filters.computeDefaultBitmapResults(inFilter, selector);
+ blackhole.consume(bitmapIndex);
}
- private List<Integer> generateInts()
+ private Iterable<Integer> intGenerator()
{
- final List<Integer> ints = new ArrayList<>(cardinality);
-
- for (int i = 0; i < cardinality; i++) {
- ints.add(START_INT + i);
- }
-
- return ints;
+ // i * 2 => half of these values will be present in the inFilter, half
won't.
+ return () -> IntStream.range(0, dictionarySize).map(i -> START_INT + i *
2).boxed().iterator();
}
}
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
index d644eb5f2f..1369d69787 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
@@ -25,6 +25,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.filter.BoundDimFilter;
import org.apache.druid.query.filter.ColumnIndexSelector;
import org.apache.druid.query.filter.Filter;
@@ -50,6 +51,7 @@ import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
@@ -122,11 +124,14 @@ public class LikeFilterBenchmark
final List<Integer> ints = generateInts();
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
FluentIterable.from(ints)
- .transform(
- i -> i.toString()
- ),
+ .transform(Object::toString),
GenericIndexed.STRING_STRATEGY
);
+ final GenericIndexed<ByteBuffer> dictionaryUtf8 =
GenericIndexed.fromIterable(
+ FluentIterable.from(ints)
+ .transform(i ->
ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
+ GenericIndexed.BYTE_BUFFER_STRATEGY
+ );
final GenericIndexed<ImmutableBitmap> bitmaps =
GenericIndexed.fromIterable(
FluentIterable.from(ints)
.transform(
@@ -140,7 +145,7 @@ public class LikeFilterBenchmark
);
selector = new MockColumnIndexSelector(
bitmapFactory,
- new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary,
bitmaps, null)
+ new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary,
dictionaryUtf8, bitmaps, null)
);
}
diff --git
a/core/src/main/java/org/apache/druid/java/util/common/ByteBufferUtils.java
b/core/src/main/java/org/apache/druid/java/util/common/ByteBufferUtils.java
index 29c46dc69a..388112378e 100644
--- a/core/src/main/java/org/apache/druid/java/util/common/ByteBufferUtils.java
+++ b/core/src/main/java/org/apache/druid/java/util/common/ByteBufferUtils.java
@@ -23,12 +23,14 @@ import org.apache.druid.collections.ResourceHolder;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.utils.JvmUtils;
+import javax.annotation.Nullable;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
+import java.util.Comparator;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -48,6 +50,8 @@ public class ByteBufferUtils
// null if unmap is supported
private static final RuntimeException UNMAP_NOT_SUPPORTED_EXCEPTION;
+ private static final Comparator<ByteBuffer> COMPARATOR_UNSIGNED = new
UnsignedByteBufferComparator();
+
static {
Object unmap = null;
RuntimeException exception = null;
@@ -211,4 +215,79 @@ public class ByteBufferUtils
{
free(buffer);
}
+
+ /**
+ * Compares two ByteBuffer ranges using unsigned byte ordering.
+ *
+ * Different from {@link ByteBuffer#compareTo}, which uses signed ordering.
+ */
+ public static int compareByteBuffers(
+ final ByteBuffer buf1,
+ final int position1,
+ final int length1,
+ final ByteBuffer buf2,
+ final int position2,
+ final int length2
+ )
+ {
+ final int commonLength = Math.min(length1, length2);
+
+ for (int i = 0; i < commonLength; i++) {
+ final byte byte1 = buf1.get(position1 + i);
+ final byte byte2 = buf2.get(position2 + i);
+ final int cmp = (byte1 & 0xFF) - (byte2 & 0xFF); // Unsigned comparison
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+
+ return Integer.compare(length1, length2);
+ }
+
+ /**
+ * Compares two ByteBuffers from their positions to their limits using
unsigned byte ordering. Accepts null
+ * buffers, which are ordered earlier than any nonnull buffer.
+ *
+ * Different from {@link ByteBuffer#compareTo}, which uses signed ordering.
+ */
+ public static int compareByteBuffers(
+ @Nullable final ByteBuffer buf1,
+ @Nullable final ByteBuffer buf2
+ )
+ {
+ if (buf1 == null) {
+ return buf2 == null ? 0 : -1;
+ }
+
+ if (buf2 == null) {
+ return 1;
+ }
+
+ return ByteBufferUtils.compareByteBuffers(
+ buf1,
+ buf1.position(),
+ buf1.remaining(),
+ buf2,
+ buf2.position(),
+ buf2.remaining()
+ );
+ }
+
+ /**
+ * Comparator that compares two {@link ByteBuffer} using unsigned ordering.
Null buffers are accepted, and
+ * are ordered earlier than any nonnull buffer.
+ */
+ public static Comparator<ByteBuffer> unsignedComparator()
+ {
+ return COMPARATOR_UNSIGNED;
+ }
+
+ private static class UnsignedByteBufferComparator implements
Comparator<ByteBuffer>
+ {
+ @Override
+ public int compare(@Nullable ByteBuffer o1, @Nullable ByteBuffer o2)
+ {
+ return ByteBufferUtils.compareByteBuffers(o1, o2);
+ }
+ }
}
diff --git
a/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java
b/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java
index b076f98bcf..fd74e48d9a 100644
--- a/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java
+++ b/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java
@@ -108,6 +108,11 @@ public class StringUtils
return StringUtils.fromUtf8(buffer, buffer.remaining());
}
+ /**
+ * Converts a string to a UTF-8 byte array.
+ *
+ * @throws NullPointerException if "string" is null
+ */
public static byte[] toUtf8(final String string)
{
try {
@@ -119,6 +124,16 @@ public class StringUtils
}
}
+ /**
+ * Converts a string to UTF-8 bytes, returning them as a newly-allocated
on-heap {@link ByteBuffer}.
+ * If "string" is null, returns null.
+ */
+ @Nullable
+ public static ByteBuffer toUtf8ByteBuffer(@Nullable final String string)
+ {
+ return string == null ? null : ByteBuffer.wrap(toUtf8(string));
+ }
+
/**
* Encodes "string" into the buffer "byteBuffer", using no more than the
number of bytes remaining in the buffer.
* Will only encode whole characters. The byteBuffer's position and limit
may be changed during operation, but will
diff --git
a/core/src/test/java/org/apache/druid/common/utils/StringUtilsTest.java
b/core/src/test/java/org/apache/druid/common/utils/StringUtilsTest.java
index 1ee6643e2f..787d5daa79 100644
--- a/core/src/test/java/org/apache/druid/common/utils/StringUtilsTest.java
+++ b/core/src/test/java/org/apache/druid/common/utils/StringUtilsTest.java
@@ -23,7 +23,10 @@ import org.apache.druid.java.util.common.StringUtils;
import org.junit.Assert;
import org.junit.Test;
+import java.nio.ByteBuffer;
+
/**
+ *
*/
public class StringUtilsTest
{
@@ -76,4 +79,12 @@ public class StringUtilsTest
Assert.assertEquals(4, StringUtils.estimatedBinaryLengthAsUTF8(invalid));
}
+ @Test
+ public void testToUtf8ByteBuffer()
+ {
+ Assert.assertNull(StringUtils.toUtf8ByteBuffer(null));
+ Assert.assertEquals(ByteBuffer.allocate(0),
StringUtils.toUtf8ByteBuffer(""));
+ Assert.assertEquals(ByteBuffer.wrap(StringUtils.toUtf8("foo")),
StringUtils.toUtf8ByteBuffer("foo"));
+ Assert.assertEquals(ByteBuffer.wrap(StringUtils.toUtf8("🙂")),
StringUtils.toUtf8ByteBuffer("🙂"));
+ }
}
diff --git
a/core/src/test/java/org/apache/druid/java/util/common/ByteBufferUtilsTest.java
b/core/src/test/java/org/apache/druid/java/util/common/ByteBufferUtilsTest.java
index 91a37dba42..f5acb6e030 100644
---
a/core/src/test/java/org/apache/druid/java/util/common/ByteBufferUtilsTest.java
+++
b/core/src/test/java/org/apache/druid/java/util/common/ByteBufferUtilsTest.java
@@ -21,6 +21,8 @@ package org.apache.druid.java.util.common;
import com.google.common.io.Files;
import org.apache.druid.collections.ResourceHolder;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
@@ -33,6 +35,7 @@ import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.util.Arrays;
+import java.util.Comparator;
public class ByteBufferUtilsTest
{
@@ -76,4 +79,53 @@ public class ByteBufferUtilsTest
final ByteBuffer heapBuffer = ByteBuffer.allocate(4096);
ByteBufferUtils.free(heapBuffer);
}
+
+ @Test
+ @SuppressWarnings("EqualsWithItself")
+ public void testUnsignedComparator()
+ {
+ final Comparator<ByteBuffer> comparator =
ByteBufferUtils.unsignedComparator();
+
+ // Tests involving null
+ MatcherAssert.assertThat(comparator.compare(null, null),
Matchers.equalTo(0));
+ MatcherAssert.assertThat(comparator.compare(null, ByteBuffer.allocate(0)),
Matchers.lessThan(0));
+ MatcherAssert.assertThat(comparator.compare(ByteBuffer.allocate(0), null),
Matchers.greaterThan(0));
+ MatcherAssert.assertThat(comparator.compare(null, ByteBuffer.allocate(1)),
Matchers.lessThan(0));
+ MatcherAssert.assertThat(comparator.compare(ByteBuffer.allocate(1), null),
Matchers.greaterThan(0));
+ MatcherAssert.assertThat(comparator.compare(null, ByteBuffer.wrap(new
byte[]{-1})), Matchers.lessThan(0));
+ MatcherAssert.assertThat(comparator.compare(ByteBuffer.wrap(new
byte[]{-1}), null), Matchers.greaterThan(0));
+
+ // Tests involving buffers of different lengths
+ MatcherAssert.assertThat(
+ comparator.compare(
+ ByteBuffer.wrap(new byte[]{1, 2, 3}),
+ ByteBuffer.wrap(new byte[]{1, 2, 3, 4})
+ ),
+ Matchers.lessThan(0)
+ );
+
+ MatcherAssert.assertThat(
+ comparator.compare(
+ ByteBuffer.wrap(new byte[]{1, 2, 3, 4}),
+ ByteBuffer.wrap(new byte[]{1, 2, 3})
+ ),
+ Matchers.greaterThan(0)
+ );
+
+ // Tests involving the full range of bytes
+ for (byte i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++) {
+ for (byte j = Byte.MIN_VALUE; j < Byte.MAX_VALUE; j++) {
+ final int cmp = Integer.compare(Byte.toUnsignedInt(i),
Byte.toUnsignedInt(j));
+
+ MatcherAssert.assertThat(
+ StringUtils.format("comparison of %s to %s",
Byte.toUnsignedInt(i), Byte.toUnsignedInt(j)),
+ comparator.compare(
+ ByteBuffer.wrap(new byte[]{i}),
+ ByteBuffer.wrap(new byte[]{j})
+ ),
+ cmp < 0 ? Matchers.lessThan(0) : cmp > 0 ? Matchers.greaterThan(0)
: Matchers.equalTo(0)
+ );
+ }
+ }
+ }
}
diff --git a/processing/src/main/java/org/apache/druid/query/Druids.java
b/processing/src/main/java/org/apache/druid/query/Druids.java
index 6b7739ab66..a760d1745d 100644
--- a/processing/src/main/java/org/apache/druid/query/Druids.java
+++ b/processing/src/main/java/org/apache/druid/query/Druids.java
@@ -210,7 +210,7 @@ public class Druids
{
final Set<String> filterValues = Sets.newHashSet(values);
filterValues.add(value);
- dimFilter = new InDimFilter(dimensionName, filterValues, null, null);
+ dimFilter = new InDimFilter(dimensionName, filterValues);
return this;
}
diff --git
a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
index 2c363dc4ad..74270b65c5 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
@@ -29,11 +29,11 @@ import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
+import com.google.common.collect.ForwardingSortedSet;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
-import com.google.common.collect.Sets;
import com.google.common.collect.TreeRangeSet;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
@@ -42,6 +42,7 @@ import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.ByteBufferUtils;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.guava.Comparators;
@@ -59,25 +60,26 @@ import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.StringValueSetIndex;
+import org.apache.druid.segment.column.Utf8ValueSetIndex;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import javax.annotation.Nullable;
+import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
import java.util.Collection;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
+import java.util.TreeSet;
public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
{
- // Values can contain `null` object
- private final Set<String> values;
+ // Values can contain `null` object. Values are sorted (nulls-first).
+ private final ValuesSet values;
+ // Computed eagerly, not lazily, because lazy computations would block all
processing threads for a given query.
+ private final SortedSet<ByteBuffer> valuesUtf8;
private final String dimension;
@Nullable
private final ExtractionFn extractionFn;
@@ -103,7 +105,7 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
@JsonProperty("dimension") String dimension,
// This 'values' collection instance can be reused if possible to avoid
copying a big collection.
// Callers should _not_ modify the collection after it is passed to this
constructor.
- @JsonProperty("values") Set<String> values,
+ @JsonProperty("values") ValuesSet values,
@JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn,
@JsonProperty("filterTuning") @Nullable FilterTuning filterTuning
)
@@ -121,32 +123,35 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
* Creates a new filter without an extraction function or any special filter
tuning.
*
* @param dimension column to search
- * @param values set of values to match. This collection may be reused to
avoid copying a big collection.
- * Therefore, callers should <b>not</b> modify the
collection after it is passed to this
- * constructor.
+ * @param values set of values to match. If this collection is a {@link
SortedSet}, it may be reused to avoid
+ * copying a big collection. Therefore, callers should
<b>not</b> modify the collection after it
+ * is passed to this constructor.
*/
- public InDimFilter(
- String dimension,
- Set<String> values
- )
+ public InDimFilter(String dimension, Set<String> values)
{
this(
dimension,
- values,
+ values instanceof ValuesSet ? (ValuesSet) values : new
ValuesSet(values),
+ null,
null,
null
);
}
/**
- * This constructor should be called only in unit tests since accepting a
Collection makes copying more likely.
+ * Creates a new filter without an extraction function or any special filter
tuning.
+ *
+ * @param dimension column to search
+ * @param values set of values to match. If this collection is a
{@link SortedSet}, it may be reused to avoid
+ * copying a big collection. Therefore, callers should
<b>not</b> modify the collection after it
+ * is passed to this constructor.
+ * @param extractionFn extraction function to apply to the column before
checking against "values"
*/
- @VisibleForTesting
public InDimFilter(String dimension, Collection<String> values, @Nullable
ExtractionFn extractionFn)
{
this(
dimension,
- values instanceof Set ? (Set<String>) values : new HashSet<>(values),
+ values instanceof ValuesSet ? (ValuesSet) values : new
ValuesSet(values),
extractionFn,
null,
null
@@ -158,7 +163,7 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
*/
private InDimFilter(
final String dimension,
- final Set<String> values,
+ final ValuesSet values,
@Nullable final ExtractionFn extractionFn,
@Nullable final FilterTuning filterTuning,
@Nullable final DruidPredicateFactory predicateFactory
@@ -166,19 +171,15 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
{
Preconditions.checkNotNull(values, "values cannot be null");
- // The values set can be huge. Try to avoid copying the set if possible.
- // Note that we may still need to copy values to a list for caching. See
getCacheKey().
+ this.values = values;
+
if (!NullHandling.sqlCompatible() && values.contains("")) {
- // In Non sql compatible mode, empty strings should be converted to
nulls for the filter.
- // In sql compatible mode, empty strings and nulls should be treated
differently
- this.values = Sets.newHashSetWithExpectedSize(values.size());
- for (String v : values) {
- this.values.add(NullHandling.emptyToNullIfNeeded(v));
- }
- } else {
- this.values = values;
+ // In non-SQL-compatible mode, empty strings must be converted to nulls
for the filter.
+ this.values.remove("");
+ this.values.add(null);
}
+ this.valuesUtf8 = this.values.toUtf8();
this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot
be null");
this.extractionFn = extractionFn;
this.filterTuning = filterTuning;
@@ -199,7 +200,7 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
}
@JsonProperty
- public Set<String> getValues()
+ public SortedSet<String> getValues()
{
return values;
}
@@ -296,9 +297,15 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
selector
);
}
- final StringValueSetIndex valueSetIndex =
indexSupplier.as(StringValueSetIndex.class);
- if (valueSetIndex != null) {
- return valueSetIndex.forValues(values);
+
+ final Utf8ValueSetIndex utf8ValueSetIndex =
indexSupplier.as(Utf8ValueSetIndex.class);
+ if (utf8ValueSetIndex != null) {
+ return utf8ValueSetIndex.forSortedValuesUtf8(valuesUtf8);
+ }
+
+ final StringValueSetIndex stringValueSetIndex =
indexSupplier.as(StringValueSetIndex.class);
+ if (stringValueSetIndex != null) {
+ return stringValueSetIndex.forSortedValues(values);
}
}
return Filters.makePredicateIndex(
@@ -399,20 +406,9 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
private byte[] computeCacheKey()
{
- final Collection<String> sortedValues;
-
- if (values instanceof SortedSet && isNaturalOrder(((SortedSet<String>)
values).comparator())) {
- // Avoid copying "values" when it is already in the order we need for
cache key computation.
- sortedValues = values;
- } else {
- final List<String> sortedValuesList = new ArrayList<>(values);
- sortedValuesList.sort(Comparators.naturalNullsFirst());
- sortedValues = sortedValuesList;
- }
-
// Hash all values, in sorted order, as their length followed by their
content.
final Hasher hasher = Hashing.sha256().newHasher();
- for (String v : sortedValues) {
+ for (String v : values) {
if (v == null) {
// Encode null as length -1, no content.
hasher.putInt(-1);
@@ -438,7 +434,7 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
LookupExtractor lookup = exFn.getLookup();
- final Set<String> keys = new HashSet<>();
+ final ValuesSet keys = new ValuesSet();
for (String value : values) {
// We cannot do an unapply()-based optimization if the selector value
@@ -468,40 +464,11 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
return this;
}
- /**
- * Returns true if the comparator is null or the singleton {@link
Comparators#naturalNullsFirst()}. Useful for
- * detecting if a sorted set is in natural order or not.
- *
- * May return false negatives (i.e. there are naturally-ordered comparators
that will return false here).
- */
- private static <T> boolean isNaturalOrder(@Nullable final Comparator<T>
comparator)
- {
- return comparator == null ||
Comparators.naturalNullsFirst().equals(comparator);
- }
-
@SuppressWarnings("ReturnValueIgnored")
private static Predicate<String> createStringPredicate(final Set<String>
values)
{
Preconditions.checkNotNull(values, "values");
-
- try {
- // Check to see if values.contains(null) will throw a
NullPointerException. Jackson JSON deserialization won't
- // lead to this (it will create a HashSet, which can accept nulls). But
when InDimFilters are created
- // programmatically as a result of optimizations like rewriting inner
joins as filters, the passed-in Set may
- // not be able to accept nulls. We don't want to copy the Sets (since
they may be large) so instead we'll wrap
- // it in a null-checking lambda if needed.
- values.contains(null);
-
- // Safe to do values.contains(null).
- return values::contains;
- }
- catch (NullPointerException ignored) {
- // Fall through
- }
-
- // Not safe to do values.contains(null); must return a wrapper.
- // Return false for null, since an exception means the set cannot accept
null (and therefore does not include it).
- return value -> value != null && values.contains(value);
+ return values::contains;
}
private static DruidLongPredicate createLongPredicate(final Set<String>
values)
@@ -558,7 +525,7 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
InFilterDruidPredicateFactory(
final ExtractionFn extractionFn,
- final Set<String> values
+ final ValuesSet values
)
{
this.extractionFn = extractionFn;
@@ -638,4 +605,50 @@ public class InDimFilter extends
AbstractOptimizableDimFilter implements Filter
return Objects.hash(extractionFn, values);
}
}
+
+ public static class ValuesSet extends ForwardingSortedSet<String>
+ {
+ private final SortedSet<String> values;
+
+ public ValuesSet()
+ {
+ this.values = new TreeSet<>(Comparators.naturalNullsFirst());
+ }
+
+ /**
+ * Create a ValuesSet from another Collection. The Collection will be
reused if it is a {@link SortedSet} with
+ * an appropriate comparator.
+ */
+ public ValuesSet(final Collection<String> values)
+ {
+ if (values instanceof SortedSet && Comparators.naturalNullsFirst()
+
.equals(((SortedSet<String>) values).comparator())) {
+ this.values = (SortedSet<String>) values;
+ } else {
+ this.values = new TreeSet<>(Comparators.naturalNullsFirst());
+ this.values.addAll(values);
+ }
+ }
+
+ public SortedSet<ByteBuffer> toUtf8()
+ {
+ final TreeSet<ByteBuffer> valuesUtf8 = new
TreeSet<>(ByteBufferUtils.unsignedComparator());
+
+ for (final String value : values) {
+ if (value == null) {
+ valuesUtf8.add(null);
+ } else {
+ valuesUtf8.add(ByteBuffer.wrap(StringUtils.toUtf8(value)));
+ }
+ }
+
+ return valuesUtf8;
+ }
+
+ @Override
+ protected SortedSet<String> delegate()
+ {
+ return values;
+ }
+ }
}
diff --git
a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java
b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java
index 50a06b2392..f878edc9ce 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java
@@ -294,9 +294,8 @@ public class LikeDimFilter extends
AbstractOptimizableDimFilter implements DimFi
}
/**
- * Checks if the suffix of strings.get(i) matches the suffix of this
matcher. The first prefix.length characters
- * of s are ignored. This method is useful if you've already independently
verified the prefix. This method
- * evalutes strings.get(i) lazily to save time when it isn't necessary to
actually look at the string.
+ * Checks if the suffix of "value" matches the suffix of this matcher. The
first prefix.length() characters
+ * of "value" are ignored. This method is useful if you've already
independently verified the prefix.
*/
public boolean matchesSuffixOnly(@Nullable String value)
{
diff --git
a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java
b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java
index c717dbead3..340943a084 100644
---
a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java
+++
b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java
@@ -34,7 +34,6 @@ import
org.apache.druid.segment.filter.DimensionPredicateFilter;
import org.apache.druid.segment.filter.SelectorFilter;
import javax.annotation.Nullable;
-import java.util.Collections;
import java.util.Objects;
import java.util.Set;
@@ -96,7 +95,9 @@ public class SelectorDimFilter extends
AbstractOptimizableDimFilter implements D
@Override
public DimFilter optimize()
{
- return new InDimFilter(dimension, Collections.singleton(value),
extractionFn, filterTuning).optimize();
+ final InDimFilter.ValuesSet valuesSet = new InDimFilter.ValuesSet();
+ valuesSet.add(value);
+ return new InDimFilter(dimension, valuesSet, extractionFn,
filterTuning).optimize();
}
@Override
diff --git
a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryBuilder.java
b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryBuilder.java
index 6699085a1e..437f9fab10 100644
--- a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryBuilder.java
+++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryBuilder.java
@@ -20,9 +20,9 @@
package org.apache.druid.query.topn;
import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Sets;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
+import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.TableDataSource;
@@ -45,6 +45,7 @@ import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeSet;
import java.util.UUID;
/**
@@ -234,7 +235,8 @@ public class TopNQueryBuilder
public TopNQueryBuilder filters(String dimensionName, String value,
String... values)
{
- final Set<String> filterValues = Sets.newHashSet(values);
+ final Set<String> filterValues = new
TreeSet<>(Comparators.naturalNullsFirst());
+ filterValues.addAll(Arrays.asList(values));
filterValues.add(value);
dimFilter = new InDimFilter(dimensionName, filterValues);
return this;
diff --git a/processing/src/main/java/org/apache/druid/segment/IndexIO.java
b/processing/src/main/java/org/apache/druid/segment/IndexIO.java
index 6c5b1e8235..6068e8c748 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexIO.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexIO.java
@@ -460,6 +460,7 @@ public class IndexIO
new DictionaryEncodedStringIndexSupplier(
new ConciseBitmapFactory(),
index.getDimValueLookup(dimension),
+ index.getDimValueUtf8Lookup(dimension),
bitmaps,
spatialIndex
),
diff --git
a/processing/src/main/java/org/apache/druid/segment/column/ColumnIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/column/ColumnIndexSupplier.java
index f5ec1688b4..7e2b21a25d 100644
---
a/processing/src/main/java/org/apache/druid/segment/column/ColumnIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/column/ColumnIndexSupplier.java
@@ -39,6 +39,8 @@ public interface ColumnIndexSupplier
* {@link org.apache.druid.segment.data.Offset} to form the basis of a
{@link org.apache.druid.segment.Cursor}
* (or {@link org.apache.druid.segment.vector.VectorOffset} and {@link
org.apache.druid.segment.vector.VectorCursor})
* which can greatly reduce the total number of rows which need to be
scanned and processed.
+ *
+ * Objects returned by this method are not thread-safe.
*/
@Nullable
<T> T as(Class<T> clazz);
diff --git
a/processing/src/main/java/org/apache/druid/segment/column/LexicographicalRangeIndex.java
b/processing/src/main/java/org/apache/druid/segment/column/LexicographicalRangeIndex.java
index d506ebc1c1..9568f671a4 100644
---
a/processing/src/main/java/org/apache/druid/segment/column/LexicographicalRangeIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/column/LexicographicalRangeIndex.java
@@ -30,21 +30,21 @@ import javax.annotation.Nullable;
public interface LexicographicalRangeIndex
{
/**
- * Get an {@link BitmapColumnIndex} corresponding to the values supplied in
the specified range
+ * Get a {@link BitmapColumnIndex} corresponding to the values supplied in
the specified range.
*/
- default BitmapColumnIndex forRange(
+ BitmapColumnIndex forRange(
@Nullable String startValue,
boolean startStrict,
@Nullable String endValue,
boolean endStrict
- )
- {
- return forRange(startValue, startStrict, endValue, endStrict, (index) ->
true);
- }
+ );
/**
- * Get an {@link BitmapColumnIndex} corresponding to the values supplied in
the specified range whose dictionary ids
- * also match some predicate, such as to match a prefix
+ * Get a {@link BitmapColumnIndex} corresponding to the values supplied in
the specified range whose dictionary ids
+ * also match some predicate, such as to match a prefix.
+ *
+ * If the provided {@code} matcher is always true, it's better to use the
other
+ * {@link #forRange(String, boolean, String, boolean)} method.
*/
BitmapColumnIndex forRange(
@Nullable String startValue,
diff --git
a/processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java
b/processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java
index 72d3e63ce8..c53c1a9be9 100644
---
a/processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java
@@ -22,7 +22,7 @@ package org.apache.druid.segment.column;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import javax.annotation.Nullable;
-import java.util.Set;
+import java.util.SortedSet;
/**
* Index on individual values, and provides bitmaps for the rows which contain
these values
@@ -36,7 +36,8 @@ public interface StringValueSetIndex
/**
* Get an {@link Iterable} of {@link ImmutableBitmap} corresponding to the
specified set of values (if they are
- * contained in the underlying column)
+ * contained in the underlying column). The set must be sorted using
+ * {@link
org.apache.druid.java.util.common.guava.Comparators#naturalNullsFirst()}.
*/
- BitmapColumnIndex forValues(Set<String> values);
+ BitmapColumnIndex forSortedValues(SortedSet<String> values);
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java
b/processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java
similarity index 71%
copy from
processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java
copy to
processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java
index 72d3e63ce8..ef0d08ee0a 100644
---
a/processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java
@@ -21,22 +21,15 @@ package org.apache.druid.segment.column;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
-import javax.annotation.Nullable;
-import java.util.Set;
+import java.nio.ByteBuffer;
+import java.util.SortedSet;
-/**
- * Index on individual values, and provides bitmaps for the rows which contain
these values
- */
-public interface StringValueSetIndex
+public interface Utf8ValueSetIndex
{
- /**
- * Get the {@link ImmutableBitmap} corresponding to the supplied value
- */
- BitmapColumnIndex forValue(@Nullable String value);
-
/**
* Get an {@link Iterable} of {@link ImmutableBitmap} corresponding to the
specified set of values (if they are
- * contained in the underlying column)
+ * contained in the underlying column). The set must be sorted using
+ * {@link
org.apache.druid.java.util.common.ByteBufferUtils#unsignedComparator()}.
*/
- BitmapColumnIndex forValues(Set<String> values);
+ BitmapColumnIndex forSortedValuesUtf8(SortedSet<ByteBuffer> valuesUtf8);
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/data/ConciseBitmapSerdeFactory.java
b/processing/src/main/java/org/apache/druid/segment/data/ConciseBitmapSerdeFactory.java
index b81c971b37..70a1fcc6cc 100644
---
a/processing/src/main/java/org/apache/druid/segment/data/ConciseBitmapSerdeFactory.java
+++
b/processing/src/main/java/org/apache/druid/segment/data/ConciseBitmapSerdeFactory.java
@@ -28,6 +28,7 @@ import
org.apache.druid.extendedset.intset.ImmutableConciseSet;
import java.nio.ByteBuffer;
/**
+ *
*/
public class ConciseBitmapSerdeFactory implements BitmapSerdeFactory
{
@@ -70,6 +71,12 @@ public class ConciseBitmapSerdeFactory implements
BitmapSerdeFactory
return val.toBytes();
}
+ @Override
+ public boolean canCompare()
+ {
+ return false;
+ }
+
@Override
public int compare(ImmutableBitmap o1, ImmutableBitmap o2)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/data/DecompressingByteBufferObjectStrategy.java
b/processing/src/main/java/org/apache/druid/segment/data/DecompressingByteBufferObjectStrategy.java
index 8ae83ea751..9c77d884b7 100644
---
a/processing/src/main/java/org/apache/druid/segment/data/DecompressingByteBufferObjectStrategy.java
+++
b/processing/src/main/java/org/apache/druid/segment/data/DecompressingByteBufferObjectStrategy.java
@@ -70,6 +70,12 @@ public class DecompressingByteBufferObjectStrategy
implements ObjectStrategy<Res
};
}
+ @Override
+ public boolean canCompare()
+ {
+ return false;
+ }
+
@Override
public int compare(ResourceHolder<ByteBuffer> o1, ResourceHolder<ByteBuffer>
o2)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java
b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java
index 982f7b0767..4fd6417f0f 100644
--- a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java
+++ b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java
@@ -24,6 +24,7 @@ import org.apache.druid.collections.ResourceHolder;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.common.utils.SerializerUtils;
import org.apache.druid.io.Channels;
+import org.apache.druid.java.util.common.ByteBufferUtils;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.guava.Comparators;
@@ -132,9 +133,9 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
}
@Override
- public int compare(ByteBuffer o1, ByteBuffer o2)
+ public int compare(@Nullable ByteBuffer o1, @Nullable ByteBuffer o2)
{
- return o1.compareTo(o2);
+ return ByteBufferUtils.unsignedComparator().compare(o1, o2);
}
};
@@ -217,7 +218,7 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
public static <T> GenericIndexed<T> fromIterable(Iterable<T>
objectsIterable, ObjectStrategy<T> strategy)
{
- return fromIterableVersionOne(objectsIterable, strategy, true, strategy);
+ return fromIterableVersionOne(objectsIterable, strategy,
strategy.canCompare(), strategy);
}
static int getNumberOfFilesRequired(int bagSize, long numWritten)
@@ -350,17 +351,13 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
if (!allowReverseLookup) {
throw new UnsupportedOperationException("Reverse lookup not allowed.");
}
- return indexOf(this, value);
- }
- private int indexOf(Indexed<T> indexed, @Nullable T value)
- {
int minIndex = 0;
int maxIndex = size - 1;
while (minIndex <= maxIndex) {
int currIndex = (minIndex + maxIndex) >>> 1;
- T currValue = indexed.get(currIndex);
+ T currValue = get(currIndex);
int comparison = strategy.compare(currValue, value);
if (comparison == 0) {
return currIndex;
@@ -458,6 +455,9 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
return sb.toString();
}
+ /**
+ * Single-threaded view.
+ */
abstract class BufferIndexed implements Indexed<T>
{
int lastReadSize;
@@ -468,8 +468,23 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
return size;
}
+ @Override
+ public T get(final int index)
+ {
+ final ByteBuffer buf = getByteBuffer(index);
+ if (buf == null) {
+ return null;
+ }
+
+ // Traditionally, ObjectStrategy.fromByteBuffer() is given a buffer with
limit set to capacity, and the
+ // actual limit is passed along as an extra parameter.
+ final int len = buf.remaining();
+ buf.limit(buf.capacity());
+ return strategy.fromByteBuffer(buf, len);
+ }
+
@Nullable
- T bufferedIndexedGet(ByteBuffer copyValueBuffer, int startOffset, int
endOffset)
+ ByteBuffer bufferedIndexedGetByteBuffer(ByteBuffer copyValueBuffer, int
startOffset, int endOffset)
{
int size = endOffset - startOffset;
// When size is 0 and SQL compatibility is enabled also check for null
marker before returning null.
@@ -481,14 +496,22 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
lastReadSize = size;
// ObjectStrategy.fromByteBuffer() is allowed to reset the limit of the
buffer. So if the limit is changed,
- // position() call in the next line could throw an exception, if the
position is set beyond the new limit. clear()
- // sets the limit to the maximum possible, the capacity. It is safe to
reset the limit to capacity, because the
- // value buffer(s) initial limit equals to capacity.
- copyValueBuffer.clear();
+ // position() call could throw an exception, if the position is set
beyond the new limit. Calling limit()
+ // followed by position() is safe, because limit() resets position if
needed.
+ copyValueBuffer.limit(endOffset);
copyValueBuffer.position(startOffset);
- return strategy.fromByteBuffer(copyValueBuffer, size);
+ return copyValueBuffer;
}
+ /**
+ * Like {@link #get(int)}, but returns a {@link ByteBuffer} instead of
using the {@link ObjectStrategy}.
+ *
+ * The returned ByteBuffer is reused by future calls. Callers must discard
it before calling another method
+ * on this BufferedIndexed object that may want to reuse the buffer.
+ */
+ @Nullable
+ protected abstract ByteBuffer getByteBuffer(int index);
+
/**
* This method makes no guarantees with respect to thread safety
*
@@ -502,7 +525,41 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
@Override
public int indexOf(@Nullable T value)
{
- return GenericIndexed.this.indexOf(this, value);
+ if (!allowReverseLookup) {
+ throw new UnsupportedOperationException("Reverse lookup not allowed.");
+ }
+
+ //noinspection ObjectEquality
+ final boolean isByteBufferStrategy = strategy == BYTE_BUFFER_STRATEGY;
+
+ int minIndex = 0;
+ int maxIndex = size - 1;
+ while (minIndex <= maxIndex) {
+ int currIndex = (minIndex + maxIndex) >>> 1;
+
+ int comparison;
+
+ if (isByteBufferStrategy) {
+ // Specialization avoids ByteBuffer allocation in
strategy.fromByteBuffer.
+ ByteBuffer currValue = getByteBuffer(currIndex);
+ comparison = ByteBufferUtils.compareByteBuffers(currValue,
(ByteBuffer) value);
+ } else {
+ T currValue = get(currIndex);
+ comparison = strategy.compare(currValue, value);
+ }
+
+ if (comparison == 0) {
+ return currIndex;
+ }
+
+ if (comparison < 0) {
+ minIndex = currIndex + 1;
+ } else {
+ maxIndex = currIndex - 1;
+ }
+ }
+
+ return -(minIndex + 1);
}
@Override
@@ -625,8 +682,9 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
final ByteBuffer copyBuffer = firstValueBuffer.asReadOnlyBuffer();
return new BufferIndexed()
{
+ @Nullable
@Override
- public T get(final int index)
+ protected ByteBuffer getByteBuffer(final int index)
{
checkIndex(index);
@@ -641,7 +699,7 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
startOffset = headerBuffer.getInt(headerPosition) + Integer.BYTES;
endOffset = headerBuffer.getInt(headerPosition + Integer.BYTES);
}
- return bufferedIndexedGet(copyBuffer, startOffset, endOffset);
+ return bufferedIndexedGetByteBuffer(copyBuffer, startOffset,
endOffset);
}
@Override
@@ -734,8 +792,9 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
return new BufferIndexed()
{
+ @Nullable
@Override
- public T get(final int index)
+ protected ByteBuffer getByteBuffer(int index)
{
checkIndex(index);
@@ -753,7 +812,7 @@ public class GenericIndexed<T> implements
CloseableIndexed<T>, Serializer
endOffset = headerBuffer.getInt(headerPosition + Integer.BYTES);
}
int fileNum = index >> logBaseTwoOfElementsPerValueFile;
- return bufferedIndexedGet(copyValueBuffers[fileNum], startOffset,
endOffset);
+ return bufferedIndexedGetByteBuffer(copyValueBuffers[fileNum],
startOffset, endOffset);
}
@Override
diff --git
a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexedWriter.java
b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexedWriter.java
index 6db18d0c12..449ed791f3 100644
---
a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexedWriter.java
+++
b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexedWriter.java
@@ -124,6 +124,12 @@ public class GenericIndexedWriter<T> implements Serializer
val.position(valPos);
}
+ @Override
+ public boolean canCompare()
+ {
+ return false;
+ }
+
@Override
public int compare(ByteBuffer o1, ByteBuffer o2)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/data/ObjectStrategy.java
b/processing/src/main/java/org/apache/druid/segment/data/ObjectStrategy.java
index ed4410b821..8a53fc57a7 100644
--- a/processing/src/main/java/org/apache/druid/segment/data/ObjectStrategy.java
+++ b/processing/src/main/java/org/apache/druid/segment/data/ObjectStrategy.java
@@ -51,6 +51,14 @@ public interface ObjectStrategy<T> extends Comparator<T>
@Nullable
byte[] toBytes(@Nullable T val);
+ /**
+ * Whether {@link #compare} is valid or not.
+ */
+ default boolean canCompare()
+ {
+ return true;
+ }
+
/**
* Reads 4-bytes numBytes from the given buffer, and then delegates to
{@link #fromByteBuffer(ByteBuffer, int)}.
*/
diff --git
a/processing/src/main/java/org/apache/druid/segment/data/RoaringBitmapSerdeFactory.java
b/processing/src/main/java/org/apache/druid/segment/data/RoaringBitmapSerdeFactory.java
index 92b58d91e9..035511981b 100644
---
a/processing/src/main/java/org/apache/druid/segment/data/RoaringBitmapSerdeFactory.java
+++
b/processing/src/main/java/org/apache/druid/segment/data/RoaringBitmapSerdeFactory.java
@@ -95,6 +95,12 @@ public class RoaringBitmapSerdeFactory implements
BitmapSerdeFactory
return val.toBytes();
}
+ @Override
+ public boolean canCompare()
+ {
+ return false;
+ }
+
@Override
public int compare(ImmutableBitmap o1, ImmutableBitmap o2)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java
index 203c8e6a0e..e4ffb7f428 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java
@@ -94,7 +94,12 @@ public class LikeFilter implements Filter
if (rangeIndex != null) {
final String lower =
NullHandling.nullToEmptyIfNeeded(likeMatcher.getPrefix());
final String upper =
NullHandling.nullToEmptyIfNeeded(likeMatcher.getPrefix()) + Character.MAX_VALUE;
- return rangeIndex.forRange(lower, false, upper, false,
likeMatcher::matchesSuffixOnly);
+
+ if (likeMatcher.getSuffixMatch() ==
LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_ANY) {
+ return rangeIndex.forRange(lower, false, upper, false);
+ } else {
+ return rangeIndex.forRange(lower, false, upper, false,
likeMatcher::matchesSuffixOnly);
+ }
}
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/join/Joinable.java
b/processing/src/main/java/org/apache/druid/segment/join/Joinable.java
index 25957f7e9a..a09bac2f24 100644
--- a/processing/src/main/java/org/apache/druid/segment/join/Joinable.java
+++ b/processing/src/main/java/org/apache/druid/segment/join/Joinable.java
@@ -71,10 +71,10 @@ public interface Joinable extends ReferenceCountedObject
* @param condition join condition for the matcher
* @param remainderNeeded whether or not {@link
JoinMatcher#matchRemainder()} will ever be called on the
* matcher. If we know it will not,
additional optimizations are often possible.
- *
* @param descending true if join cursor is iterated in
descending order
* @param closer closer that will run after join cursor
has completed to clean up any per query
* resources the joinable uses
+ *
* @return the matcher
*/
JoinMatcher makeJoinMatcher(
@@ -89,6 +89,10 @@ public interface Joinable extends ReferenceCountedObject
* Returns all nonnull values from a particular column if they are all
unique, if there are "maxNumValues" or fewer,
* and if the column exists and supports this operation. Otherwise, returns
an empty Optional.
*
+ * The returned set may be passed to {@link
org.apache.druid.query.filter.InDimFilter}. For efficiency,
+ * implementations should prefer creating the returned set with
+ * {@code new TreeSet<String>(Comparators.naturalNullsFirst()}}. This avoids
a copy in the filter's constructor.
+ *
* @param columnName name of the column
* @param maxNumValues maximum number of values to return
*/
@@ -98,13 +102,18 @@ public interface Joinable extends ReferenceCountedObject
* Searches a column from this Joinable for a particular value, finds rows
that match,
* and returns values of a second column for those rows.
*
- * @param searchColumnName Name of the search column. This is the column
that is being used in the filter
- * @param searchColumnValue Target value of the search column. This is the
value that is being filtered on.
- * @param retrievalColumnName The column to retrieve values from. This is
the column that is being joined against.
- * @param maxCorrelationSetSize Maximum number of values to retrieve. If we
detect that more values would be
- * returned than this limit, return absent.
+ * The returned set may be passed to {@link
org.apache.druid.query.filter.InDimFilter}. For efficiency,
+ * implementations should prefer creating the returned set with
+ * {@code new TreeSet<String>(Comparators.naturalNullsFirst()}}. This avoids
a copy in the filter's constructor.
+ *
+ * @param searchColumnName Name of the search column. This is the
column that is being used in the filter
+ * @param searchColumnValue Target value of the search column. This is
the value that is being filtered on.
+ * @param retrievalColumnName The column to retrieve values from. This
is the column that is being joined against.
+ * @param maxCorrelationSetSize Maximum number of values to retrieve. If
we detect that more values would be
+ * returned than this limit, return absent.
* @param allowNonKeyColumnSearch If true, allow searchs on non-key columns.
If this is false,
* a search on a non-key column returns
absent.
+ *
* @return The set of correlated column values. If we cannot determine
correlated values, return absent.
*
* In case either the search or retrieval column names are not found, this
will return absent.
diff --git
a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinable.java
b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinable.java
index c230b253fc..840728c784 100644
---
a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinable.java
+++
b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinable.java
@@ -33,7 +33,6 @@ import org.apache.druid.segment.join.Joinable;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
-import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
@@ -104,10 +103,7 @@ public class IndexedTableJoinable implements Joinable
try (final IndexedTable.Reader reader =
table.columnReader(columnPosition)) {
// Sorted set to encourage "in" filters that result from this method to
do dictionary lookups in order.
// The hopes are that this will improve locality and therefore improve
performance.
- //
- // Note: we are using Comparators.naturalNullsFirst() because it
prevents the need for lambda-wrapping in
- // InDimFilter's "createStringPredicate" method.
- final Set<String> allValues = new
TreeSet<>(Comparators.naturalNullsFirst());
+ final Set<String> allValues = createValuesSet();
for (int i = 0; i < table.numRows(); i++) {
final String s =
DimensionHandlerUtils.convertObjectToString(reader.read(i));
@@ -147,7 +143,7 @@ public class IndexedTableJoinable implements Joinable
return Optional.empty();
}
try (final Closer closer = Closer.create()) {
- Set<String> correlatedValues = new HashSet<>();
+ Set<String> correlatedValues = createValuesSet();
if (table.keyColumns().contains(searchColumnName)) {
IndexedTable.Index index = table.columnIndex(filterColumnPosition);
IndexedTable.Reader reader =
table.columnReader(correlatedColumnPosition);
@@ -196,4 +192,12 @@ public class IndexedTableJoinable implements Joinable
{
return table.acquireReferences();
}
+
+ /**
+ * Create a Set that InDimFilter will accept without incurring a copy.
+ */
+ private static Set<String> createValuesSet()
+ {
+ return new TreeSet<>(Comparators.naturalNullsFirst());
+ }
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
index 286e3702cc..bf1e66df35 100644
---
a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
+++
b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
@@ -365,6 +365,7 @@ public class DictionaryEncodedColumnPartSerde implements
ColumnPartSerde
new DictionaryEncodedStringIndexSupplier(
bitmapSerdeFactory.getBitmapFactory(),
rDictionary,
+ rDictionaryUtf8,
rBitmaps,
rSpatialIndex
),
diff --git
a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java
index dffd624d90..0a1ee09d51 100644
---
a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java
@@ -20,6 +20,7 @@
package org.apache.druid.segment.serde;
import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
import it.unimi.dsi.fastutil.ints.IntIntImmutablePair;
import it.unimi.dsi.fastutil.ints.IntIntPair;
import it.unimi.dsi.fastutil.ints.IntIterator;
@@ -27,6 +28,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.spatial.ImmutableRTree;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.segment.IntListUtils;
@@ -39,13 +41,16 @@ import
org.apache.druid.segment.column.LexicographicalRangeIndex;
import org.apache.druid.segment.column.SimpleColumnIndexCapabilities;
import org.apache.druid.segment.column.SpatialIndex;
import org.apache.druid.segment.column.StringValueSetIndex;
+import org.apache.druid.segment.column.Utf8ValueSetIndex;
import org.apache.druid.segment.data.GenericIndexed;
+import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.filter.Filters;
import javax.annotation.Nullable;
+import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.NoSuchElementException;
-import java.util.Set;
+import java.util.SortedSet;
public class DictionaryEncodedStringIndexSupplier implements
ColumnIndexSupplier
{
@@ -53,6 +58,7 @@ public class DictionaryEncodedStringIndexSupplier implements
ColumnIndexSupplier
private final BitmapFactory bitmapFactory;
private final GenericIndexed<String> dictionary;
+ private final GenericIndexed<ByteBuffer> dictionaryUtf8;
@Nullable
private final GenericIndexed<ImmutableBitmap> bitmaps;
@Nullable
@@ -61,29 +67,38 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
public DictionaryEncodedStringIndexSupplier(
BitmapFactory bitmapFactory,
GenericIndexed<String> dictionary,
- GenericIndexed<ImmutableBitmap> bitmaps,
- ImmutableRTree indexedTree
+ GenericIndexed<ByteBuffer> dictionaryUtf8,
+ @Nullable GenericIndexed<ImmutableBitmap> bitmaps,
+ @Nullable ImmutableRTree indexedTree
)
{
this.bitmapFactory = bitmapFactory;
- this.bitmaps = bitmaps;
this.dictionary = dictionary;
+ this.dictionaryUtf8 = dictionaryUtf8;
+ this.bitmaps = bitmaps;
this.indexedTree = indexedTree;
}
@Nullable
@Override
+ @SuppressWarnings("unchecked")
public <T> T as(Class<T> clazz)
{
- if (clazz.equals(StringValueSetIndex.class)) {
- return (T) new
GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory, dictionary,
bitmaps);
- } else if (clazz.equals(DruidPredicateIndex.class)) {
+ if (bitmaps != null && clazz.equals(StringValueSetIndex.class)) {
+ return (T) new
GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory,
dictionaryUtf8, bitmaps);
+ } else if (bitmaps != null && clazz.equals(Utf8ValueSetIndex.class)) {
+ return (T) new
GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory,
dictionaryUtf8, bitmaps);
+ } else if (bitmaps != null && clazz.equals(DruidPredicateIndex.class)) {
return (T) new
GenericIndexedDictionaryEncodedStringDruidPredicateIndex(bitmapFactory,
dictionary, bitmaps);
- } else if (clazz.equals(LexicographicalRangeIndex.class)) {
- return (T) new
GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex(bitmapFactory,
dictionary, bitmaps);
- } else if (clazz.equals(DictionaryEncodedStringValueIndex.class)) {
+ } else if (bitmaps != null &&
clazz.equals(LexicographicalRangeIndex.class)) {
+ return (T) new
GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex(
+ bitmapFactory,
+ dictionaryUtf8,
+ bitmaps
+ );
+ } else if (bitmaps != null &&
clazz.equals(DictionaryEncodedStringValueIndex.class)) {
return (T) new
GenericIndexedDictionaryEncodedStringValueIndex(bitmapFactory, dictionary,
bitmaps);
- } else if (clazz.equals(SpatialIndex.class)) {
+ } else if (indexedTree != null && clazz.equals(SpatialIndex.class)) {
return (T) (SpatialIndex) () -> indexedTree;
}
return null;
@@ -98,21 +113,21 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
}
}
- private abstract static class BaseGenericIndexedDictionaryEncodedStringIndex
+ private abstract static class BaseGenericIndexedDictionaryEncodedIndex<T>
{
protected final BitmapFactory bitmapFactory;
- protected final GenericIndexed<String> dictionary;
- protected final GenericIndexed<ImmutableBitmap> bitmaps;
+ protected final Indexed<T> dictionary;
+ protected final Indexed<ImmutableBitmap> bitmaps;
- protected BaseGenericIndexedDictionaryEncodedStringIndex(
+ protected BaseGenericIndexedDictionaryEncodedIndex(
BitmapFactory bitmapFactory,
- GenericIndexed<String> dictionary,
+ GenericIndexed<T> dictionary,
GenericIndexed<ImmutableBitmap> bitmaps
)
{
this.bitmapFactory = bitmapFactory;
- this.dictionary = dictionary;
- this.bitmaps = bitmaps;
+ this.dictionary = dictionary.singleThreaded();
+ this.bitmaps = bitmaps.singleThreaded();
}
public ImmutableBitmap getBitmap(int idx)
@@ -127,7 +142,7 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
}
public static final class GenericIndexedDictionaryEncodedStringValueIndex
- extends BaseGenericIndexedDictionaryEncodedStringIndex implements
DictionaryEncodedStringValueIndex
+ extends BaseGenericIndexedDictionaryEncodedIndex<String> implements
DictionaryEncodedStringValueIndex
{
public GenericIndexedDictionaryEncodedStringValueIndex(
BitmapFactory bitmapFactory,
@@ -165,12 +180,13 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
}
public static final class GenericIndexedDictionaryEncodedStringValueSetIndex
- extends BaseGenericIndexedDictionaryEncodedStringIndex implements
StringValueSetIndex
+ extends BaseGenericIndexedDictionaryEncodedIndex<ByteBuffer> implements
StringValueSetIndex, Utf8ValueSetIndex
{
+ private static final int SIZE_WORTH_CHECKING_MIN = 8;
public GenericIndexedDictionaryEncodedStringValueSetIndex(
BitmapFactory bitmapFactory,
- GenericIndexed<String> dictionary,
+ GenericIndexed<ByteBuffer> dictionary,
GenericIndexed<ImmutableBitmap> bitmaps
)
{
@@ -197,14 +213,43 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
private ImmutableBitmap getBitmapForValue()
{
- final int idx = dictionary.indexOf(value);
+ final ByteBuffer valueUtf8 = value == null ? null :
ByteBuffer.wrap(StringUtils.toUtf8(value));
+ final int idx = dictionary.indexOf(valueUtf8);
return getBitmap(idx);
}
};
}
@Override
- public BitmapColumnIndex forValues(Set<String> values)
+ public BitmapColumnIndex forSortedValues(SortedSet<String> values)
+ {
+ return getBitmapColumnIndexForSortedIterableUtf8(
+ Iterables.transform(
+ values,
+ input -> ByteBuffer.wrap(StringUtils.toUtf8(input))
+ )
+ );
+ }
+
+ @Override
+ public BitmapColumnIndex forSortedValuesUtf8(SortedSet<ByteBuffer>
valuesUtf8)
+ {
+ final SortedSet<ByteBuffer> tailSet;
+
+ if (valuesUtf8.size() >= SIZE_WORTH_CHECKING_MIN) {
+ final ByteBuffer minValueInColumn = dictionary.get(0);
+ tailSet = valuesUtf8.tailSet(minValueInColumn);
+ } else {
+ tailSet = valuesUtf8;
+ }
+
+ return getBitmapColumnIndexForSortedIterableUtf8(tailSet);
+ }
+
+ /**
+ * Helper used by {@link #forSortedValues} and {@link
#forSortedValuesUtf8}.
+ */
+ private BitmapColumnIndex
getBitmapColumnIndexForSortedIterableUtf8(Iterable<ByteBuffer> valuesUtf8)
{
return new DictionaryEncodedStringBitmapColumnIndex()
{
@@ -222,9 +267,11 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
private Iterable<ImmutableBitmap> getBitmapsIterable()
{
+ final int dictionarySize = dictionary.size();
+
return () -> new Iterator<ImmutableBitmap>()
{
- final Iterator<String> iterator = values.iterator();
+ final Iterator<ByteBuffer> iterator = valuesUtf8.iterator();
int next = -1;
@Override
@@ -253,8 +300,15 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
private void findNext()
{
while (next < 0 && iterator.hasNext()) {
- String nextValue = iterator.next();
+ ByteBuffer nextValue = iterator.next();
next = dictionary.indexOf(nextValue);
+
+ if (next == -dictionarySize - 1) {
+ // nextValue is past the end of the dictionary.
+ // Note: we can rely on indexOf returning (-(insertion
point) - 1), even though Indexed doesn't
+ // guarantee it, because "dictionary" comes from
GenericIndexed singleThreaded().
+ break;
+ }
}
}
};
@@ -264,7 +318,7 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
}
public static final class
GenericIndexedDictionaryEncodedStringDruidPredicateIndex
- extends BaseGenericIndexedDictionaryEncodedStringIndex implements
DruidPredicateIndex
+ extends BaseGenericIndexedDictionaryEncodedIndex<String> implements
DruidPredicateIndex
{
public GenericIndexedDictionaryEncodedStringDruidPredicateIndex(
BitmapFactory bitmapFactory,
@@ -350,12 +404,12 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
}
public static final class
GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex
- extends BaseGenericIndexedDictionaryEncodedStringIndex implements
LexicographicalRangeIndex
+ extends BaseGenericIndexedDictionaryEncodedIndex<ByteBuffer> implements
LexicographicalRangeIndex
{
public GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex(
BitmapFactory bitmapFactory,
- GenericIndexed<String> dictionary,
+ GenericIndexed<ByteBuffer> dictionary,
GenericIndexed<ImmutableBitmap> bitmaps
)
{
@@ -447,7 +501,7 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
private int findNext()
{
- while (currIndex < end &&
!matcher.apply(dictionary.get(currIndex))) {
+ while (currIndex < end &&
!applyMatcher(dictionary.get(currIndex))) {
currIndex++;
}
@@ -478,16 +532,32 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
}
};
}
+
+ private boolean applyMatcher(@Nullable final ByteBuffer valueUtf8)
+ {
+ if (valueUtf8 == null) {
+ return matcher.apply(null);
+ } else {
+ // Duplicate buffer, because StringUtils.fromUtf8 advances the
position, and we do not want to do that.
+ return matcher.apply(StringUtils.fromUtf8(valueUtf8.duplicate()));
+ }
+ }
};
}
- private IntIntPair getRange(@Nullable String startValue, boolean
startStrict, @Nullable String endValue, boolean endStrict)
+ private IntIntPair getRange(
+ @Nullable String startValue,
+ boolean startStrict,
+ @Nullable String endValue,
+ boolean endStrict
+ )
{
int startIndex, endIndex;
if (startValue == null) {
startIndex = 0;
} else {
- final int found =
dictionary.indexOf(NullHandling.emptyToNullIfNeeded(startValue));
+ final String startValueToUse =
NullHandling.emptyToNullIfNeeded(startValue);
+ final int found =
dictionary.indexOf(StringUtils.toUtf8ByteBuffer(startValueToUse));
if (found >= 0) {
startIndex = startStrict ? found + 1 : found;
} else {
@@ -498,7 +568,8 @@ public class DictionaryEncodedStringIndexSupplier
implements ColumnIndexSupplier
if (endValue == null) {
endIndex = dictionary.size();
} else {
- final int found =
dictionary.indexOf(NullHandling.emptyToNullIfNeeded(endValue));
+ final String endValueToUse =
NullHandling.emptyToNullIfNeeded(endValue);
+ final int found =
dictionary.indexOf(StringUtils.toUtf8ByteBuffer(endValueToUse));
if (found >= 0) {
endIndex = endStrict ? found : found + 1;
} else {
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
index f6bb879b66..032542efd1 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.query.BitmapResultFactory;
@@ -60,6 +61,7 @@ import java.util.List;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
+import java.util.SortedSet;
/**
* {@link VirtualColumn} form of {@link ListFilteredDimensionSpec}, powered by
@@ -397,7 +399,7 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
}
@Override
- public BitmapColumnIndex forValues(Set<String> values)
+ public BitmapColumnIndex forSortedValues(SortedSet<String> values)
{
return new BaseVirtualIndex()
{
@@ -505,6 +507,17 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
super(delegate, idMapping);
}
+ @Override
+ public BitmapColumnIndex forRange(
+ @Nullable String startValue,
+ boolean startStrict,
+ @Nullable String endValue,
+ boolean endStrict
+ )
+ {
+ return forRange(startValue, startStrict, endValue, endStrict,
Predicates.alwaysTrue());
+ }
+
@Override
public BitmapColumnIndex forRange(
@Nullable String startValue,
diff --git
a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java
b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java
index 0c933b67e9..ddcf978528 100644
---
a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java
+++
b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java
@@ -31,11 +31,20 @@ import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.query.extraction.RegexDimExtractionFn;
import org.apache.druid.segment.RowAdapters;
import org.apache.druid.segment.RowBasedColumnSelectorFactory;
+import org.apache.druid.segment.column.BitmapColumnIndex;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.StringValueSetIndex;
+import org.apache.druid.segment.column.Utf8ValueSetIndex;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
+import org.junit.Rule;
import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.junit.MockitoJUnit;
+import org.mockito.junit.MockitoRule;
+import org.mockito.quality.Strictness;
import java.io.IOException;
import java.util.Arrays;
@@ -52,6 +61,9 @@ public class InDimFilterTest extends
InitializedNullHandlingTest
private final String serializedFilter =
"{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"bad\",\"good\"]}";
+ @Rule
+ public MockitoRule mockitoRule =
MockitoJUnit.rule().strictness(Strictness.STRICT_STUBS);
+
@Test
public void testDeserialization() throws IOException
{
@@ -71,21 +83,23 @@ public class InDimFilterTest extends
InitializedNullHandlingTest
@Test
public void testGetValuesWithValuesSetOfNonEmptyStringsUseTheGivenSet()
{
- final Set<String> values = ImmutableSet.of("v1", "v2", "v3");
- final InDimFilter filter = new InDimFilter("dim", values, null, null);
+ final Set<String> values = new InDimFilter.ValuesSet();
+ values.addAll(Arrays.asList("v1", "v2", "v3"));
+ final InDimFilter filter = new InDimFilter("dim", values);
Assert.assertSame(values, filter.getValues());
}
@Test
public void testGetValuesWithValuesSetIncludingEmptyString()
{
- final Set<String> values = Sets.newHashSet("v1", "", "v3");
- final InDimFilter filter = new InDimFilter("dim", values, null, null);
+ final InDimFilter.ValuesSet values = new
InDimFilter.ValuesSet(ImmutableSet.of("v1", "", "v3"));
+ final InDimFilter filter = new InDimFilter("dim", values);
if (NullHandling.replaceWithDefault()) {
- Assert.assertNotSame(values, filter.getValues());
+ Assert.assertSame(values, filter.getValues());
Assert.assertEquals(Sets.newHashSet("v1", null, "v3"),
filter.getValues());
} else {
Assert.assertSame(values, filter.getValues());
+ Assert.assertEquals(Sets.newHashSet("v1", "", "v3"), filter.getValues());
}
}
@@ -234,4 +248,53 @@ public class InDimFilterTest extends
InitializedNullHandlingTest
// Now it *shouldn't* match.
Assert.assertFalse(matcher.matches());
}
+
+ @Test
+ public void testUsesUtf8SetIndex()
+ {
+ // An implementation test.
+ // This test confirms that "in" filters use utf8 index lookups when
available.
+
+ final Filter inFilter = new InDimFilter("dim0", ImmutableSet.of("v1",
"v2")).toFilter();
+
+ final ColumnIndexSelector indexSelector =
Mockito.mock(ColumnIndexSelector.class);
+ final ColumnIndexSupplier indexSupplier =
Mockito.mock(ColumnIndexSupplier.class);
+ final Utf8ValueSetIndex valueIndex = Mockito.mock(Utf8ValueSetIndex.class);
+ final BitmapColumnIndex bitmapColumnIndex =
Mockito.mock(BitmapColumnIndex.class);
+
+ final InDimFilter.ValuesSet expectedValuesSet = new
InDimFilter.ValuesSet();
+ expectedValuesSet.addAll(Arrays.asList("v1", "v2"));
+
+
Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier);
+
Mockito.when(indexSupplier.as(Utf8ValueSetIndex.class)).thenReturn(valueIndex);
+
Mockito.when(valueIndex.forSortedValuesUtf8(expectedValuesSet.toUtf8())).thenReturn(bitmapColumnIndex);
+
+ final BitmapColumnIndex retVal =
inFilter.getBitmapColumnIndex(indexSelector);
+ Assert.assertSame("inFilter returns the intended bitmapColumnIndex",
bitmapColumnIndex, retVal);
+ }
+
+ @Test
+ public void testUsesStringSetIndex()
+ {
+ // An implementation test.
+ // This test confirms that "in" filters use non-utf8 string index lookups
when utf8 indexes are not available.
+
+ final Filter inFilter = new InDimFilter("dim0", ImmutableSet.of("v1",
"v2")).toFilter();
+
+ final ColumnIndexSelector indexSelector =
Mockito.mock(ColumnIndexSelector.class);
+ final ColumnIndexSupplier indexSupplier =
Mockito.mock(ColumnIndexSupplier.class);
+ final StringValueSetIndex valueIndex =
Mockito.mock(StringValueSetIndex.class);
+ final BitmapColumnIndex bitmapColumnIndex =
Mockito.mock(BitmapColumnIndex.class);
+
+ final InDimFilter.ValuesSet expectedValuesSet = new
InDimFilter.ValuesSet();
+ expectedValuesSet.addAll(Arrays.asList("v1", "v2"));
+
+
Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier);
+ Mockito.when(indexSupplier.as(Utf8ValueSetIndex.class)).thenReturn(null);
// Will check for UTF-8 first.
+
Mockito.when(indexSupplier.as(StringValueSetIndex.class)).thenReturn(valueIndex);
+
Mockito.when(valueIndex.forSortedValues(expectedValuesSet)).thenReturn(bitmapColumnIndex);
+
+ final BitmapColumnIndex retVal =
inFilter.getBitmapColumnIndex(indexSelector);
+ Assert.assertSame("inFilter returns the intended bitmapColumnIndex",
bitmapColumnIndex, retVal);
+ }
}
diff --git
a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java
b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java
index ef1a97005c..c51ec817b7 100644
---
a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java
+++
b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java
@@ -24,15 +24,27 @@ import com.google.common.collect.Sets;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.query.extraction.SubstringDimExtractionFn;
+import org.apache.druid.segment.column.BitmapColumnIndex;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.column.LexicographicalRangeIndex;
+import org.apache.druid.segment.column.StringValueSetIndex;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
+import org.junit.Rule;
import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.junit.MockitoJUnit;
+import org.mockito.junit.MockitoRule;
+import org.mockito.quality.Strictness;
import java.io.IOException;
import java.util.Arrays;
public class LikeDimFilterTest extends InitializedNullHandlingTest
{
+ @Rule
+ public MockitoRule mockitoRule =
MockitoJUnit.rule().strictness(Strictness.STRICT_STUBS);
+
@Test
public void testSerde() throws IOException
{
@@ -88,4 +100,49 @@ public class LikeDimFilterTest extends
InitializedNullHandlingTest
.withNonnullFields("suffixMatch", "prefix", "pattern")
.verify();
}
+
+ @Test
+ public void testPrefixMatchUsesRangeIndex()
+ {
+ // An implementation test.
+ // This test confirms that "like" filters with prefix matchers use
index-range lookups without matcher predicates.
+
+ final Filter likeFilter = new LikeDimFilter("dim0", "f%", null, null,
null).toFilter();
+
+ final ColumnIndexSelector indexSelector =
Mockito.mock(ColumnIndexSelector.class);
+ final ColumnIndexSupplier indexSupplier =
Mockito.mock(ColumnIndexSupplier.class);
+ final LexicographicalRangeIndex rangeIndex =
Mockito.mock(LexicographicalRangeIndex.class);
+ final BitmapColumnIndex bitmapColumnIndex =
Mockito.mock(BitmapColumnIndex.class);
+
+
Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier);
+
Mockito.when(indexSupplier.as(LexicographicalRangeIndex.class)).thenReturn(rangeIndex);
+ Mockito.when(
+ // Verify that likeFilter uses forRange without a matcher predicate;
it's unnecessary and slows things down
+ rangeIndex.forRange("f", false, "f" + Character.MAX_VALUE, false)
+ ).thenReturn(bitmapColumnIndex);
+
+ final BitmapColumnIndex retVal =
likeFilter.getBitmapColumnIndex(indexSelector);
+ Assert.assertSame("likeFilter returns the intended bitmapColumnIndex",
bitmapColumnIndex, retVal);
+ }
+
+ @Test
+ public void testExactMatchUsesValueIndex()
+ {
+ // An implementation test.
+ // This test confirms that "like" filters with exact matchers use index
lookups.
+
+ final Filter likeFilter = new LikeDimFilter("dim0", "f", null, null,
null).toFilter();
+
+ final ColumnIndexSelector indexSelector =
Mockito.mock(ColumnIndexSelector.class);
+ final ColumnIndexSupplier indexSupplier =
Mockito.mock(ColumnIndexSupplier.class);
+ final StringValueSetIndex valueIndex =
Mockito.mock(StringValueSetIndex.class);
+ final BitmapColumnIndex bitmapColumnIndex =
Mockito.mock(BitmapColumnIndex.class);
+
+
Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier);
+
Mockito.when(indexSupplier.as(StringValueSetIndex.class)).thenReturn(valueIndex);
+ Mockito.when(valueIndex.forValue("f")).thenReturn(bitmapColumnIndex);
+
+ final BitmapColumnIndex retVal =
likeFilter.getBitmapColumnIndex(indexSelector);
+ Assert.assertSame("likeFilter returns the intended bitmapColumnIndex",
bitmapColumnIndex, retVal);
+ }
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
index c37996d035..268a4a2c97 100644
---
a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
@@ -26,6 +26,7 @@ import
org.apache.druid.collections.bitmap.ConciseBitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
+import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.extraction.DimExtractionFn;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.ColumnIndexSelector;
@@ -49,6 +50,7 @@ import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import javax.annotation.Nullable;
+import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.Map;
@@ -117,6 +119,10 @@ public class ExtractionDimFilterTest extends
InitializedNullHandlingTest
return new DictionaryEncodedStringIndexSupplier(
factory,
GenericIndexed.fromIterable(Collections.singletonList("foo1"),
GenericIndexed.STRING_STRATEGY),
+ GenericIndexed.fromIterable(
+
Collections.singletonList(ByteBuffer.wrap(StringUtils.toUtf8("foo1"))),
+ GenericIndexed.BYTE_BUFFER_STRATEGY
+ ),
GenericIndexed.fromIterable(Collections.singletonList(foo1BitMap),
serdeFactory.getObjectStrategy()),
null
);
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java
b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java
index 0d02cd9b1f..7981fdf3d5 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java
@@ -386,7 +386,7 @@ public class InFilterTest extends BaseFilterTest
EqualsVerifier.forClass(InDimFilter.class)
.usingGetClass()
.withNonnullFields("dimension", "values")
- .withIgnoredFields("cacheKeySupplier", "predicateFactory",
"cachedOptimizedFilter")
+ .withIgnoredFields("cacheKeySupplier", "predicateFactory",
"cachedOptimizedFilter", "valuesUtf8")
.verify();
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java
b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java
index 813db7d4cd..b6fd9f4f0e 100644
---
a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java
@@ -242,7 +242,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#en.wikipedia"),
- new InDimFilter("countryIsoCode", ImmutableSet.of("US"), null,
null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("US")).toFilter()
)
),
new SelectorFilter("rtc.countryName", "United States"),
@@ -649,7 +649,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
))
)
),
- new InDimFilter("countryIsoCode", ImmutableSet.of("CA"), null,
null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("CA")).toFilter()
)
),
new AndFilter(
@@ -751,7 +751,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#en.wikipedia"),
- new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0",
ImmutableSet.of("SU"), null, null).toFilter()
+ new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0",
ImmutableSet.of("SU")).toFilter()
)
),
new SelectorFilter("rtc.countryName", "States United"),
@@ -898,7 +898,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new OrFilter(
ImmutableList.of(
new SelectorFilter("channel", "#ko.wikipedia"),
- new InDimFilter("countryIsoCode",
ImmutableSet.of("US"), null, null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("US")).toFilter()
)
),
new OrFilter(
@@ -906,8 +906,8 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new SelectorFilter("channel", "#ko.wikipedia"),
new AndFilter(
ImmutableList.of(
- new InDimFilter("countryIsoCode",
ImmutableSet.of("US"), null, null).toFilter(),
- new InDimFilter("regionIsoCode",
ImmutableSet.of("VA"), null, null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("US")).toFilter(),
+ new InDimFilter("regionIsoCode",
ImmutableSet.of("VA")).toFilter()
)
)
)
@@ -1004,7 +1004,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#en.wikipedia"),
- new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0",
ImmutableSet.of("USCA"), null, null).toFilter()
+ new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0",
ImmutableSet.of("USCA")).toFilter()
)
),
new SelectorFilter("c1.countryName", "Usca"),
@@ -1094,7 +1094,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#en.wikipedia"),
- new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0",
ImmutableSet.of("USCA"), null, null).toFilter()
+ new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0",
ImmutableSet.of("USCA")).toFilter()
)
),
new SelectorFilter("c1.v", "Usca"),
@@ -1164,7 +1164,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#de.wikipedia"),
- new InDimFilter("countryIsoCode", ImmutableSet.of("DE"), null,
null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("DE")).toFilter()
)
),
new SelectorFilter(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryName",
"Germany"),
@@ -1221,7 +1221,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#de.wikipedia"),
- new InDimFilter("countryIsoCode", ImmutableSet.of("DE"), null,
null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("DE")).toFilter()
)
),
new SelectorFilter(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "v",
"Germany"),
@@ -1453,7 +1453,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#en.wikipedia"),
- new InDimFilter("countryNumber", ImmutableSet.of("0"), null,
null).toFilter()
+ new InDimFilter("countryNumber",
ImmutableSet.of("0")).toFilter()
)
),
new SelectorFilter(FACT_TO_COUNTRY_ON_NUMBER_PREFIX + "countryName",
"Australia"),
@@ -1516,7 +1516,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#en.wikipedia"),
- new InDimFilter("countryNumber", ImmutableSet.of("0"), null,
null).toFilter()
+ new InDimFilter("countryNumber",
ImmutableSet.of("0")).toFilter()
)
),
new SelectorFilter(FACT_TO_COUNTRY_ON_NUMBER_PREFIX + "v",
"Australia"),
@@ -1683,7 +1683,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#es.wikipedia"),
- new InDimFilter("countryIsoCode", ImmutableSet.of("SV"), null,
null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("SV")).toFilter()
)
),
new SelectorFilter(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryName",
"El Salvador"),
@@ -1740,7 +1740,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new AndFilter(
ImmutableList.of(
new SelectorFilter("channel", "#es.wikipedia"),
- new InDimFilter("countryIsoCode", ImmutableSet.of("SV"), null,
null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("SV")).toFilter()
)
),
new SelectorFilter(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "v", "El
Salvador"),
@@ -1916,8 +1916,8 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
JoinFilterSplit expectedFilterSplit = new JoinFilterSplit(
new AndFilter(
ImmutableList.of(
- new InDimFilter("countryIsoCode", ImmutableSet.of("MMMM"),
null, null).toFilter(),
- new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM"),
null, null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("MMMM")).toFilter(),
+ new InDimFilter("regionIsoCode",
ImmutableSet.of("MMMM")).toFilter()
)
),
new SelectorFilter("r1.regionName", "Fourems Province"),
@@ -1987,7 +1987,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
JoinFilterSplit expectedFilterSplit = new JoinFilterSplit(
new OrFilter(
ImmutableList.of(
- new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM"),
null, null).toFilter(),
+ new InDimFilter("regionIsoCode",
ImmutableSet.of("MMMM")).toFilter(),
new SelectorFilter("regionIsoCode", "AAAA")
)
),
@@ -2288,7 +2288,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
null
)),
expressionFilter,
- new InDimFilter("rtc.countryIsoCode", ImmutableSet.of("CA", "CA2",
"CA3"), null, null).toFilter(),
+ new InDimFilter("rtc.countryIsoCode", ImmutableSet.of("CA", "CA2",
"CA3")).toFilter(),
new OrFilter(
ImmutableList.of(
new SelectorFilter("channel", "#fr.wikipedia"),
@@ -2414,7 +2414,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
ImmutableList.of(
new SelectorFilter(rewrittenRegionIsoCodeColumnName, "ON"),
new SelectorFilter(rewrittenCountryIsoCodeColumnName, "CA"),
- new InDimFilter(rewrittenCountryIsoCodeColumnName,
ImmutableSet.of("CA"), null, null).toFilter(),
+ new InDimFilter(rewrittenCountryIsoCodeColumnName,
ImmutableSet.of("CA")).toFilter(),
new BoundFilter(new BoundDimFilter(
rewrittenCountryIsoCodeColumnName,
"CA",
@@ -2427,9 +2427,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
)),
new InDimFilter(
rewrittenCountryIsoCodeColumnName,
- ImmutableSet.of("CA", "CA2", "CA3"),
- null,
- null
+ ImmutableSet.of("CA", "CA2", "CA3")
).toFilter(),
new OrFilter(
ImmutableList.of(
@@ -2453,9 +2451,7 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
new SelectorFilter(rewrittenCountryIsoCodeColumnName,
"ABCDEF"),
new InDimFilter(
rewrittenCountryIsoCodeColumnName,
- ImmutableSet.of("CA"),
- null,
- null
+ ImmutableSet.of("CA")
).toFilter(),
new BoundFilter(new BoundDimFilter(
rewrittenCountryIsoCodeColumnName,
@@ -2549,10 +2545,10 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentStorageAdapterTes
JoinFilterSplit expectedFilterSplit = new JoinFilterSplit(
new AndFilter(
ImmutableList.of(
- new InDimFilter("countryIsoCode", ImmutableSet.of("US"), null,
null).toFilter(),
- new InDimFilter("regionIsoCode", ImmutableSet.of("CA"), null,
null).toFilter(),
- new InDimFilter("countryIsoCode", ImmutableSet.of("MMMM",
"AAAA"), null, null).toFilter(),
- new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM",
"AAAA"), null, null).toFilter()
+ new InDimFilter("countryIsoCode",
ImmutableSet.of("US")).toFilter(),
+ new InDimFilter("regionIsoCode",
ImmutableSet.of("CA")).toFilter(),
+ new InDimFilter("countryIsoCode", ImmutableSet.of("MMMM",
"AAAA")).toFilter(),
+ new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM",
"AAAA")).toFilter()
)
),
originalFilter,
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
index 78edea3a61..dbb5ab4fbf 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
@@ -19,7 +19,6 @@
package org.apache.druid.sql.calcite.expression.builtin;
-import com.google.common.collect.Sets;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlFunction;
@@ -40,6 +39,7 @@ import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry;
import javax.annotation.Nullable;
+import java.util.Arrays;
import java.util.List;
public class ArrayOverlapOperatorConversion extends
BaseExpressionDimFilterOperatorConversion
@@ -123,7 +123,7 @@ public class ArrayOverlapOperatorConversion extends
BaseExpressionDimFilterOpera
} else {
return new InDimFilter(
simpleExtractionExpr.getSimpleExtraction().getColumn(),
- Sets.newHashSet(arrayElements),
+ new InDimFilter.ValuesSet(Arrays.asList(arrayElements)),
simpleExtractionExpr.getSimpleExtraction().getExtractionFn(),
null
);
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java
b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java
index 631fd93776..29ea371298 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java
@@ -20,7 +20,6 @@
package org.apache.druid.sql.calcite.filtration;
import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.InDimFilter;
@@ -34,7 +33,6 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Set;
public class ConvertSelectorsToIns extends BottomUpTransform
{
@@ -80,7 +78,7 @@ public class ConvertSelectorsToIns extends BottomUpTransform
final List<SelectorDimFilter> filterList = entry.getValue();
if (filterList.size() > 1) {
// We found a simplification. Remove the old filters and add new
ones.
- final Set<String> values =
Sets.newHashSetWithExpectedSize(filterList.size());
+ final InDimFilter.ValuesSet values = new InDimFilter.ValuesSet();
for (final SelectorDimFilter selector : filterList) {
values.add(selector.getValue());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]