This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new b8f62f464a7 add DictionaryEncodedValueIndex.getValueIterator and use
it for ExpressionPredicateIndexSupplier (#19023)
b8f62f464a7 is described below
commit b8f62f464a7277720bdb95459b39852476189e9b
Author: Clint Wylie <[email protected]>
AuthorDate: Wed Feb 18 13:20:03 2026 -0800
add DictionaryEncodedValueIndex.getValueIterator and use it for
ExpressionPredicateIndexSupplier (#19023)
changes:
* Added `getValueIterator` method to `DictionaryEncodedValueIndex` to give
an easy way for consumers to iterate the dictionary values in order
* `ExpressionPredicateIndexSupplier` now uses `getValueIterator` to scan
the dictionary values, offering a performance improvement, particularly when
using front-coding
* fixed a few other places that were iterating the dictionary using get to
use iterator instead
---
.../benchmark/query/SqlExpressionBenchmark.java | 3 +-
.../expr/ExpressionPredicateIndexSupplier.java | 7 +++-
.../druid/query/metadata/SegmentAnalyzer.java | 8 +++-
.../druid/query/search/UseIndexesStrategy.java | 9 ++++-
.../segment/DictionaryEncodedColumnMerger.java | 4 +-
...xedStringDictionaryEncodedStringValueIndex.java | 7 ++++
.../semantic/DictionaryEncodedValueIndex.java | 6 +++
.../nested/NestedFieldColumnIndexSupplier.java | 46 +++++++++++++++++-----
.../nested/ScalarDoubleColumnAndIndexSupplier.java | 23 ++++++++++-
.../nested/ScalarLongColumnAndIndexSupplier.java | 23 ++++++++++-
.../segment/virtual/ListFilteredVirtualColumn.java | 21 ++++++++++
.../java/org/apache/druid/cli/DumpSegment.java | 26 ++++++------
12 files changed, 150 insertions(+), 33 deletions(-)
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
index cc9e7b888da..8988973f982 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -159,7 +159,8 @@ public class SqlExpressionBenchmark extends
SqlBaseQueryBenchmark
"SELECT CASE WHEN MOD(long1, 2) = 0 THEN -1 WHEN MOD(long1, 2) = 1 THEN
long2 / MOD(long1, 2) ELSE long3 END FROM expressions GROUP BY 1",
// cast
"SELECT CAST(string1 as BIGINT) + CAST(string3 as DOUBLE) + long3,
COUNT(*) FROM expressions GROUP BY 1 ORDER BY 2",
- "SELECT COUNT(*), SUM(CAST(string1 as BIGINT) + CAST(string3 as BIGINT))
FROM expressions WHERE double3 < 1010.0 AND double3 > 100.0"
+ "SELECT COUNT(*), SUM(CAST(string1 as BIGINT) + CAST(string3 as BIGINT))
FROM expressions WHERE double3 < 1010.0 AND double3 > 100.0",
+ "SELECT COUNT(*) FROM expressions WHERE __time >= TIMESTAMP '2000-01-01
00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00' AND
(UPPER(COALESCE(string3,'')) LIKE '1%' OR TRIM(UPPER(COALESCE(string3,'')))
LIKE '1%' OR SUBSTRING(UPPER(COALESCE(string3,'')),1,1) IN
('1','2','3','4','5') OR ('X' || UPPER(COALESCE(string3,''))) LIKE 'X1%') AND
(UPPER(COALESCE(string5,'')) LIKE '2%' OR TRIM(UPPER(COALESCE(string5,'')))
LIKE '2%' OR SUBSTRING(UPPER(COALESCE(string5,'')),1,1) IN ('1','2 [...]
);
@Param({
diff --git
a/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java
b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java
index d2c04770b7f..a8a9a762938 100644
---
a/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java
@@ -225,13 +225,16 @@ public class ExpressionPredicateIndexSupplier implements
ColumnIndexSupplier
private abstract static class BitmapIterator implements
Iterator<ImmutableBitmap>
{
private final DictionaryEncodedValueIndex<?> inputColumnIndexes;
+
int next;
int index = 0;
boolean nextSet = false;
+ private final Iterator<?> valuesIterator;
private BitmapIterator(DictionaryEncodedValueIndex<?> inputColumnIndexes)
{
this.inputColumnIndexes = inputColumnIndexes;
+ this.valuesIterator = inputColumnIndexes.getValueIterator();
}
@Override
@@ -258,8 +261,8 @@ public class ExpressionPredicateIndexSupplier implements
ColumnIndexSupplier
private void findNext()
{
- while (!nextSet && index < inputColumnIndexes.getCardinality()) {
- Object nextValue = inputColumnIndexes.getValue(index);
+ while (!nextSet && valuesIterator.hasNext()) {
+ final Object nextValue = valuesIterator.next();
nextSet = nextMatches(nextValue);
if (nextSet) {
next = index;
diff --git
a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
index dd484e9a707..1ebf6e5cc7f 100644
---
a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
+++
b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
@@ -54,6 +54,7 @@ import org.apache.druid.segment.serde.ComplexMetrics;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.EnumSet;
+import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
@@ -212,11 +213,14 @@ public class SegmentAnalyzer
if (valueIndex != null) {
cardinality = valueIndex.getCardinality();
if (analyzingSize()) {
- for (int i = 0; i < cardinality; ++i) {
- String value = valueIndex.getValue(i);
+ final Iterator<String> valueIterator = valueIndex.getValueIterator();
+ int i = 0;
+ while (valueIterator.hasNext()) {
+ final String value = valueIterator.next();
if (value != null) {
size += StringUtils.estimatedBinaryLengthAsUTF8(value) * ((long)
valueIndex.getBitmap(i).size());
}
+ i++;
}
}
if (analyzingMinMax() && cardinality > 0) {
diff --git
a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java
b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java
index 35bfbbb3838..bd1eafa170e 100644
---
a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java
+++
b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java
@@ -51,6 +51,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
public class UseIndexesStrategy extends SearchStrategy
@@ -305,9 +306,12 @@ public class UseIndexesStrategy extends SearchStrategy
// these were checked to be non-null in partitionDimensionList
final DictionaryEncodedStringValueIndex bitmapIndex =
indexSupplier.as(DictionaryEncodedStringValueIndex.class);
- for (int i = 0; i < bitmapIndex.getCardinality(); ++i) {
- String dimVal = extractionFn.apply(bitmapIndex.getValue(i));
+ final Iterator<String> iterator = bitmapIndex.getValueIterator();
+ int i = 0;
+ while (iterator.hasNext()) {
+ final String dimVal = extractionFn.apply(iterator.next());
if (!searchQuerySpec.accept(dimVal)) {
+ i++;
continue;
}
ImmutableBitmap bitmap = bitmapIndex.getBitmap(i);
@@ -320,6 +324,7 @@ public class UseIndexesStrategy extends SearchStrategy
return retVal;
}
}
+ i++;
}
}
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java
b/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java
index 393d54b3e83..8ea88b580f6 100644
---
a/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java
+++
b/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java
@@ -581,8 +581,8 @@ public abstract class DictionaryEncodedColumnMerger<T
extends Comparable<T>> imp
private boolean allNull(Indexed<T> dimValues)
{
- for (int i = 0, size = dimValues.size(); i < size; i++) {
- if (dimValues.get(i) != null) {
+ for (T dimValue : dimValues) {
+ if (dimValue != null) {
return false;
}
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java
index c3c0c304410..2b999a01b37 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java
@@ -25,6 +25,7 @@ import org.apache.druid.segment.data.Indexed;
import
org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex;
import javax.annotation.Nullable;
+import java.util.Iterator;
public final class IndexedStringDictionaryEncodedStringValueIndex<TDictionary
extends Indexed<String>>
implements DictionaryEncodedStringValueIndex
@@ -63,6 +64,12 @@ public final class
IndexedStringDictionaryEncodedStringValueIndex<TDictionary ex
return bitmapFactory;
}
+ @Override
+ public Iterator<String> getValueIterator()
+ {
+ return dictionary.iterator();
+ }
+
@Override
public ImmutableBitmap getBitmap(int idx)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java
index b60115e4066..9ef6e2cffcc 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java
@@ -24,6 +24,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import javax.annotation.Nullable;
+import java.util.Iterator;
/**
* This exposes a 'raw' view into bitmap value indexes for {@link
DictionaryEncodedColumn}. This allows callers
@@ -54,5 +55,10 @@ public interface DictionaryEncodedValueIndex<T>
@Nullable
T getValue(int index);
+ /**
+ * Returns an {@link Iterator} containing all the underlying values of the
dictionary in order
+ */
+ Iterator<T> getValueIterator();
+
BitmapFactory getBitmapFactory();
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java
index 8b78c76c343..66835edcb74 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java
@@ -352,7 +352,7 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
};
}
- private class NestedFieldDictionaryEncodedStringValueIndex implements
DictionaryEncodedStringValueIndex
+ private final class NestedFieldDictionaryEncodedStringValueIndex implements
DictionaryEncodedStringValueIndex
{
final FixedIndexed<Integer> localDictionary =
localDictionarySupplier.get();
final Indexed<ByteBuffer> stringDictionary =
globalStringDictionarySupplier.get();
@@ -369,14 +369,7 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
@Override
public String getValue(int index)
{
- int globalIndex = localDictionary.get(index);
- if (globalIndex < adjustLongId) {
- return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex));
- } else if (globalIndex < adjustDoubleId) {
- return String.valueOf(longDictionary.get(globalIndex - adjustLongId));
- } else {
- return String.valueOf(doubleDictionary.get(globalIndex -
adjustDoubleId));
- }
+ return getStringValueFromGlobalId(localDictionary.get(index));
}
@Override
@@ -385,11 +378,46 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
return bitmapFactory;
}
+ @Override
+ public Iterator<String> getValueIterator()
+ {
+ final Iterator<Integer> localIterator = localDictionary.iterator();
+ return new Iterator<>()
+ {
+ @Override
+ public boolean hasNext()
+ {
+ return localIterator.hasNext();
+ }
+
+ @Override
+ public String next()
+ {
+ return getStringValueFromGlobalId(localIterator.next());
+ }
+ };
+ }
+
@Override
public ImmutableBitmap getBitmap(int idx)
{
return NestedFieldColumnIndexSupplier.this.getBitmap(idx);
}
+
+ @Nullable
+ private String getStringValueFromGlobalId(int globalIndex)
+ {
+ if (globalIndex == 0) {
+ return null;
+ }
+ if (globalIndex < adjustLongId) {
+ return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex));
+ } else if (globalIndex < adjustDoubleId) {
+ return String.valueOf(longDictionary.get(globalIndex - adjustLongId));
+ } else {
+ return String.valueOf(doubleDictionary.get(globalIndex -
adjustDoubleId));
+ }
+ }
}
private class NestedStringValueSetIndexes implements StringValueSetIndexes
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java
index ac871372cda..81826eadb92 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java
@@ -34,6 +34,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.java.util.common.RE;
import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.math.expr.Evals;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.query.BitmapResultFactory;
@@ -635,7 +636,7 @@ public class ScalarDoubleColumnAndIndexSupplier implements
Supplier<NestedCommon
public String getValue(int index)
{
final Double value = dictionary.get(index);
- return value == null ? null : String.valueOf(value);
+ return Evals.asString(value);
}
@Override
@@ -643,5 +644,25 @@ public class ScalarDoubleColumnAndIndexSupplier implements
Supplier<NestedCommon
{
return bitmapFactory;
}
+
+ @Override
+ public Iterator<String> getValueIterator()
+ {
+ final Iterator<Double> delegate = dictionary.iterator();
+ return new Iterator<>()
+ {
+ @Override
+ public boolean hasNext()
+ {
+ return delegate.hasNext();
+ }
+
+ @Override
+ public String next()
+ {
+ return Evals.asString(delegate.next());
+ }
+ };
+ }
}
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java
index 03ba8e4b4fd..0818354ed5b 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java
@@ -33,6 +33,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.guava.GuavaUtils;
import org.apache.druid.java.util.common.RE;
import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.math.expr.Evals;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.query.BitmapResultFactory;
@@ -646,7 +647,7 @@ public class ScalarLongColumnAndIndexSupplier implements
Supplier<NestedCommonFo
public String getValue(int index)
{
final Long value = dictionary.get(index);
- return value == null ? null : String.valueOf(value);
+ return Evals.asString(value);
}
@Override
@@ -654,5 +655,25 @@ public class ScalarLongColumnAndIndexSupplier implements
Supplier<NestedCommonFo
{
return bitmapFactory;
}
+
+ @Override
+ public Iterator<String> getValueIterator()
+ {
+ final Iterator<Long> delegate = dictionary.iterator();
+ return new Iterator<>()
+ {
+ @Override
+ public boolean hasNext()
+ {
+ return delegate.hasNext();
+ }
+
+ @Override
+ public String next()
+ {
+ return Evals.asString(delegate.next());
+ }
+ };
+ }
}
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
index 271473f481e..41b58b04a39 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
@@ -720,6 +720,27 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
return delegate.getBitmapFactory();
}
+ @Override
+ public Iterator<String> getValueIterator()
+ {
+ return new Iterator<>()
+ {
+ int position = 0;
+
+ @Override
+ public boolean hasNext()
+ {
+ return position < idMapping.getValueCardinality();
+ }
+
+ @Override
+ public String next()
+ {
+ return delegate.getValue(idMapping.getReverseId(position++));
+ }
+ };
+ }
+
@Override
public ImmutableBitmap getBitmap(int idx)
{
diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java
b/services/src/main/java/org/apache/druid/cli/DumpSegment.java
index 1986fb1ad9a..d6b8336b167 100644
--- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java
+++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java
@@ -86,7 +86,6 @@ import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.data.BitmapSerdeFactory;
import org.apache.druid.segment.data.ConciseBitmapSerdeFactory;
import org.apache.druid.segment.data.FixedIndexed;
-import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
import org.apache.druid.segment.file.SegmentFileMapperV10;
import org.apache.druid.segment.filter.Filters;
@@ -108,6 +107,7 @@ import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.EnumSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -488,21 +488,21 @@ public class DumpSegment extends GuiceRunnable
}
jg.writeEndArray();
- Indexed<ByteBuffer> globalStringDictionary =
nestedDataColumn.getUtf8BytesDictionary();
- Indexed<Long> globalLongDictionary =
nestedDataColumn.getLongDictionary();
- Indexed<Double> globalDoubleDictionary =
nestedDataColumn.getDoubleDictionary();
+ Iterator<ByteBuffer> globalStringIterator =
nestedDataColumn.getUtf8BytesDictionary().iterator();
+ Iterator<Long> globalLongIterator =
nestedDataColumn.getLongDictionary().iterator();
+ Iterator<Double> globalDoubleIterator =
nestedDataColumn.getDoubleDictionary().iterator();
jg.writeFieldName("dictionaries");
jg.writeStartObject();
{
int globalId = 0;
jg.writeFieldName("strings");
jg.writeStartArray();
- for (int i = 0; i < globalStringDictionary.size(); i++,
globalId++) {
+ while (globalStringIterator.hasNext()) {
jg.writeStartObject();
jg.writeFieldName("globalId");
- jg.writeNumber(globalId);
+ jg.writeNumber(globalId++);
jg.writeFieldName("value");
- final ByteBuffer val = globalStringDictionary.get(i);
+ final ByteBuffer val = globalStringIterator.next();
if (val == null) {
jg.writeNull();
} else {
@@ -514,24 +514,24 @@ public class DumpSegment extends GuiceRunnable
jg.writeFieldName("longs");
jg.writeStartArray();
- for (int i = 0; i < globalLongDictionary.size(); i++,
globalId++) {
+ while (globalLongIterator.hasNext()) {
jg.writeStartObject();
jg.writeFieldName("globalId");
- jg.writeNumber(globalId);
+ jg.writeNumber(globalId++);
jg.writeFieldName("value");
- jg.writeNumber(globalLongDictionary.get(i));
+ jg.writeNumber(globalLongIterator.next());
jg.writeEndObject();
}
jg.writeEndArray();
jg.writeFieldName("doubles");
jg.writeStartArray();
- for (int i = 0; i < globalDoubleDictionary.size(); i++,
globalId++) {
+ while (globalDoubleIterator.hasNext()) {
jg.writeStartObject();
jg.writeFieldName("globalId");
- jg.writeNumber(globalId);
+ jg.writeNumber(globalId++);
jg.writeFieldName("value");
- jg.writeNumber(globalDoubleDictionary.get(i));
+ jg.writeNumber(globalDoubleIterator.next());
jg.writeEndObject();
}
jg.writeEndArray();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]