This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 9bb25e6de92 fix OR filter partial index value matcher on cursor reset
(#18029)
9bb25e6de92 is described below
commit 9bb25e6de92a0597ad3616c784f12bc25a583337
Author: Clint Wylie <[email protected]>
AuthorDate: Thu May 22 12:36:55 2025 -0700
fix OR filter partial index value matcher on cursor reset (#18029)
* fix OR filter partial index value matcher on cursor reset
* oops, missed a filter tuning
* fix reset condition check for partial or index matchers, fix
RowBasedColumnValueSelector by removing ignored marking since it could
incorrectly eliminate columns that had some mvd rows and some number rows
(uncommon, but im not sure it is technically illegal)
* fixes
* better variable name
* more tests
---
.../druid/segment/filter/ExpressionFilter.java | 32 ++++------
.../org/apache/druid/segment/filter/OrFilter.java | 71 ++++++++++++++++------
.../RowBasedExpressionColumnValueSelector.java | 24 ++------
.../druid/segment/filter/BaseFilterTest.java | 25 ++++++++
.../druid/segment/filter/ExpressionFilterTest.java | 17 ++++++
.../apache/druid/segment/filter/OrFilterTest.java | 40 ++++++++++++
6 files changed, 150 insertions(+), 59 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
index 7d1424d76da..d4f52fa8ead 100644
---
a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
+++
b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
@@ -154,39 +154,31 @@ public class ExpressionFilter implements Filter
}
if (eval.type().isArray()) {
+ final Object[] result = eval.asArray();
+ if (result == null) {
+ // if value was null and includeUnknown was true we would have
already returned before getting here so
+ // is safe to return false
+ return false;
+ }
switch (eval.elementType().getType()) {
case LONG:
- final Object[] lResult = eval.asArray();
- if (lResult == null) {
- return false;
- }
if (includeUnknown) {
- return Arrays.stream(lResult).anyMatch(o -> o == null ||
Evals.asBoolean((long) o));
+ return Arrays.stream(result).anyMatch(o -> o == null ||
Evals.asBoolean((long) o));
}
- return
Arrays.stream(lResult).filter(Objects::nonNull).anyMatch(o ->
Evals.asBoolean((long) o));
+ return Arrays.stream(result).filter(Objects::nonNull).anyMatch(o
-> Evals.asBoolean((long) o));
case STRING:
- final Object[] sResult = eval.asArray();
- if (sResult == null) {
- return false;
- }
-
if (includeUnknown) {
- return Arrays.stream(sResult).anyMatch(o -> o == null ||
Evals.asBoolean((String) o));
+ return Arrays.stream(result).anyMatch(o -> o == null ||
Evals.asBoolean((String) o));
}
- return Arrays.stream(sResult).anyMatch(o ->
Evals.asBoolean((String) o));
+ return Arrays.stream(result).anyMatch(o ->
Evals.asBoolean((String) o));
case DOUBLE:
- final Object[] dResult = eval.asArray();
- if (dResult == null) {
- return false;
- }
-
if (includeUnknown) {
- return Arrays.stream(dResult).anyMatch(o -> o == null ||
Evals.asBoolean((double) o));
+ return Arrays.stream(result).anyMatch(o -> o == null ||
Evals.asBoolean((double) o));
}
- return
Arrays.stream(dResult).filter(Objects::nonNull).anyMatch(o ->
Evals.asBoolean((double) o));
+ return Arrays.stream(result).filter(Objects::nonNull).anyMatch(o
-> Evals.asBoolean((double) o));
}
}
return eval.asBoolean();
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
index 8363fd29c9f..ebf60288c9e 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
@@ -296,21 +296,29 @@ public class OrFilter implements BooleanFilter
if (descending) {
- final IntIterator iter =
BitmapOffset.getReverseBitmapOffsetIterator(rowBitmap);
+ final IntIterator initialIterator =
BitmapOffset.getReverseBitmapOffsetIterator(rowBitmap);
- if (!iter.hasNext()) {
+ if (!initialIterator.hasNext()) {
return ValueMatchers.allFalse();
}
return new ValueMatcher()
{
int iterOffset = Integer.MAX_VALUE;
+ int previousOffset = Integer.MAX_VALUE;
+ IntIterator iterator = initialIterator;
@Override
public boolean matches(boolean includeUnknown)
{
int currentOffset = offset.getOffset();
- while (iterOffset > currentOffset && iter.hasNext()) {
- iterOffset = iter.next();
+ // check if the cursor was reset, and reset iterator if so
+ if (currentOffset >= previousOffset) {
+ iterOffset = Integer.MAX_VALUE;
+ iterator = BitmapOffset.getReverseBitmapOffsetIterator(rowBitmap);
+ }
+ previousOffset = currentOffset;
+ while (iterOffset > currentOffset && iterator.hasNext()) {
+ iterOffset = iterator.next();
}
return iterOffset == currentOffset;
@@ -320,26 +328,34 @@ public class OrFilter implements BooleanFilter
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("offset", offset);
- inspector.visit("iter", iter);
+ inspector.visit("iter", iterator);
}
};
} else {
- final PeekableIntIterator peekableIterator =
rowBitmap.peekableIterator();
+ final PeekableIntIterator initialIterator = rowBitmap.peekableIterator();
- if (!peekableIterator.hasNext()) {
+ if (!initialIterator.hasNext()) {
return ValueMatchers.allFalse();
}
return new ValueMatcher()
{
int iterOffset = -1;
+ int previousOffset = -1;
+ PeekableIntIterator iterator = initialIterator;
@Override
public boolean matches(boolean includeUnknown)
{
int currentOffset = offset.getOffset();
- peekableIterator.advanceIfNeeded(currentOffset);
- if (peekableIterator.hasNext()) {
- iterOffset = peekableIterator.peekNext();
+ // check if the cursor was reset, and reset iterator if so
+ if (currentOffset <= previousOffset) {
+ iterOffset = -1;
+ iterator = rowBitmap.peekableIterator();
+ }
+ previousOffset = currentOffset;
+ iterator.advanceIfNeeded(currentOffset);
+ if (iterator.hasNext()) {
+ iterOffset = iterator.peekNext();
}
return iterOffset == currentOffset;
@@ -349,7 +365,7 @@ public class OrFilter implements BooleanFilter
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("offset", offset);
- inspector.visit("peekableIterator", peekableIterator);
+ inspector.visit("peekableIterator", iterator);
}
};
}
@@ -360,8 +376,8 @@ public class OrFilter implements BooleanFilter
final ImmutableBitmap bitmap
)
{
- final PeekableIntIterator peekableIntIterator = bitmap.peekableIterator();
- if (!peekableIntIterator.hasNext()) {
+ final PeekableIntIterator initialIterator = bitmap.peekableIterator();
+ if (!initialIterator.hasNext()) {
return BooleanVectorValueMatcher.of(vectorOffset,
ConstantMatcherType.ALL_FALSE);
}
@@ -369,19 +385,29 @@ public class OrFilter implements BooleanFilter
{
final VectorMatch match = VectorMatch.wrap(new
int[vectorOffset.getMaxVectorSize()]);
int iterOffset = -1;
+ int previousStartOffset = -1;
+ PeekableIntIterator iterator = initialIterator;
@Override
public ReadableVectorMatch match(ReadableVectorMatch mask, boolean
includeUnknown)
{
final int[] selection = match.getSelection();
+
if (vectorOffset.isContiguous()) {
+ final int startOffset = vectorOffset.getStartOffset();
+ // check if the cursor was reset, and reset iterator if so
+ if (startOffset <= previousStartOffset) {
+ iterOffset = -1;
+ iterator = bitmap.peekableIterator();
+ }
+ previousStartOffset = startOffset;
int numRows = 0;
for (int i = 0; i < mask.getSelectionSize(); i++) {
final int maskNum = mask.getSelection()[i];
- final int rowNum = vectorOffset.getStartOffset() + maskNum;
- peekableIntIterator.advanceIfNeeded(rowNum);
- if (peekableIntIterator.hasNext()) {
- iterOffset = peekableIntIterator.peekNext();
+ final int rowNum = startOffset + maskNum;
+ iterator.advanceIfNeeded(rowNum);
+ if (iterator.hasNext()) {
+ iterOffset = iterator.peekNext();
if (iterOffset == rowNum) {
selection[numRows++] = maskNum;
}
@@ -391,13 +417,18 @@ public class OrFilter implements BooleanFilter
return match;
} else {
final int[] currentOffsets = vectorOffset.getOffsets();
+ if (getCurrentVectorSize() > 0 && currentOffsets[0] <=
previousStartOffset) {
+ iterOffset = -1;
+ iterator = bitmap.peekableIterator();
+ }
+ previousStartOffset = currentOffsets[0];
int numRows = 0;
for (int i = 0; i < mask.getSelectionSize(); i++) {
final int maskNum = mask.getSelection()[i];
final int rowNum = currentOffsets[mask.getSelection()[i]];
- peekableIntIterator.advanceIfNeeded(rowNum);
- if (peekableIntIterator.hasNext()) {
- iterOffset = peekableIntIterator.peekNext();
+ iterator.advanceIfNeeded(rowNum);
+ if (iterator.hasNext()) {
+ iterOffset = iterator.peekNext();
if (iterOffset == rowNum) {
selection[numRows++] = maskNum;
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
index 86e4aedb988..085510aaccf 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
@@ -19,6 +19,7 @@
package org.apache.druid.segment.virtual;
+import com.google.common.collect.Lists;
import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import org.apache.druid.math.expr.Expr;
@@ -27,10 +28,7 @@ import org.apache.druid.math.expr.Parser;
import org.apache.druid.segment.RowIdSupplier;
import javax.annotation.Nullable;
-import java.util.ArrayList;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import java.util.stream.Collectors;
/**
@@ -46,7 +44,6 @@ public class RowBasedExpressionColumnValueSelector extends
BaseExpressionColumnV
private final Expr expression;
private final List<String> unknownColumns;
private final Expr.BindingAnalysis baseBindingAnalysis;
- private final Set<String> ignoredColumns;
private final Int2ObjectMap<Expr> transformedCache;
public RowBasedExpressionColumnValueSelector(
@@ -63,7 +60,6 @@ public class RowBasedExpressionColumnValueSelector extends
BaseExpressionColumnV
.filter(x ->
!plan.getAnalysis().getArrayBindings().contains(x))
.collect(Collectors.toList());
this.baseBindingAnalysis = plan.getAnalysis();
- this.ignoredColumns = new HashSet<>();
this.transformedCache = new Int2ObjectArrayMap<>(unknownColumns.size());
}
@@ -71,7 +67,7 @@ public class RowBasedExpressionColumnValueSelector extends
BaseExpressionColumnV
protected ExprEval<?> eval()
{
// check to find any arrays for this row
- List<String> arrayBindings = new ArrayList<>();
+ final List<String> arrayBindings =
Lists.newArrayListWithCapacity(unknownColumns.size());
for (String unknownColumn : unknownColumns) {
if (isBindingArray(unknownColumn)) {
@@ -79,20 +75,14 @@ public class RowBasedExpressionColumnValueSelector extends
BaseExpressionColumnV
}
}
- // eliminate anything that will never be an array
- if (ignoredColumns.size() > 0) {
- unknownColumns.removeAll(ignoredColumns);
- ignoredColumns.clear();
- }
-
// if there are arrays, we need to transform the expression to one that
applies each value of the array to the
// base expression, we keep a cache of transformed expressions to minimize
extra work
- if (arrayBindings.size() > 0) {
+ if (!arrayBindings.isEmpty()) {
final int key = arrayBindings.hashCode();
if (transformedCache.containsKey(key)) {
return transformedCache.get(key).eval(bindings);
}
- Expr transformed = Parser.applyUnappliedBindings(expression,
baseBindingAnalysis, arrayBindings);
+ final Expr transformed = Parser.applyUnappliedBindings(expression,
baseBindingAnalysis, arrayBindings);
transformedCache.put(key, transformed);
return transformed.eval(bindings);
}
@@ -108,11 +98,7 @@ public class RowBasedExpressionColumnValueSelector extends
BaseExpressionColumnV
{
Object binding = bindings.get(x);
if (binding != null) {
- if (binding instanceof Object[] && ((Object[]) binding).length > 0) {
- return true;
- } else if (binding instanceof Number) {
- ignoredColumns.add(x);
- }
+ return binding instanceof Object[] && ((Object[]) binding).length > 0;
}
return false;
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
index fa73bf7e983..7cbc9cdc29a 100644
---
a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
@@ -732,6 +732,11 @@ public abstract class BaseFilterTest extends
InitializedNullHandlingTest
return false;
}
+ protected boolean hasTypeInformation()
+ {
+ return !testName.contains("rowBasedWithoutTypeSignature");
+ }
+
protected boolean canTestArrayColumns()
{
if (testName.contains("frame (columnar)") ||
testName.contains("rowBasedWithoutTypeSignature")) {
@@ -805,6 +810,12 @@ public abstract class BaseFilterTest extends
InitializedNullHandlingTest
final List<String> values = new ArrayList<>();
+ while (!cursor.isDone()) {
+ IndexedInts row = selector.getRow();
+ Preconditions.checkState(row.size() == 1);
+ cursor.advance();
+ }
+ cursor.reset();
while (!cursor.isDone()) {
IndexedInts row = selector.getRow();
Preconditions.checkState(row.size() == 1);
@@ -916,6 +927,12 @@ public abstract class BaseFilterTest extends
InitializedNullHandlingTest
final List<String> values = new ArrayList<>();
+ while (!cursor.isDone()) {
+ IndexedInts row = selector.getRow();
+ Preconditions.checkState(row.size() == 1);
+ cursor.advance();
+ }
+ cursor.reset();
while (!cursor.isDone()) {
IndexedInts row = selector.getRow();
Preconditions.checkState(row.size() == 1);
@@ -976,6 +993,10 @@ public abstract class BaseFilterTest extends
InitializedNullHandlingTest
final List<String> values = new ArrayList<>();
+ while (!cursor.isDone()) {
+ cursor.advance();
+ }
+ cursor.reset();
while (!cursor.isDone()) {
final int[] rowVector = selector.getRowVector();
for (int i = 0; i < cursor.getCurrentVectorSize(); i++) {
@@ -1001,6 +1022,10 @@ public abstract class BaseFilterTest extends
InitializedNullHandlingTest
final List<String> values = new ArrayList<>();
+ while (!cursor.isDone()) {
+ cursor.advance();
+ }
+ cursor.reset();
while (!cursor.isDone()) {
final int[] rowVector = selector.getRowVector();
for (int i = 0; i < cursor.getCurrentVectorSize(); i++) {
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java
b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java
index 5509af29694..8e32a2d8fbb 100644
---
a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java
@@ -176,6 +176,23 @@ public class ExpressionFilterTest extends BaseFilterTest
assertFilterMatchesSkipVectorize(edf("dim4 == ''"), ImmutableList.of("2"));
// AS per SQL standard null == null returns false.
assertFilterMatchesSkipVectorize(edf("dim4 == null"), ImmutableList.of());
+ if (hasTypeInformation()) {
+ assertFilterMatchesSkipVectorize(edf("isnull(dim4)"),
ImmutableList.of("1", "6", "7", "8"));
+ assertFilterMatchesSkipVectorize(
+ NotDimFilter.of(edf("isnull(dim4)")),
+ ImmutableList.of("0", "2", "3", "4", "5", "9")
+ );
+ assertFilterMatchesSkipVectorize(edf("cast(dim4, 'LONG')"),
ImmutableList.of("0", "3", "4", "5", "9"));
+ assertFilterMatchesSkipVectorize(NotDimFilter.of(edf("cast(dim4,
'LONG')")), ImmutableList.of());
+ assertFilterMatchesSkipVectorize(edf("cast(dim4, 'STRING')"),
ImmutableList.of());
+ assertFilterMatchesSkipVectorize(
+ NotDimFilter.of(edf("cast(dim4, 'STRING')")),
+ ImmutableList.of("0", "2", "3", "4", "5", "9")
+ );
+ assertFilterMatchesSkipVectorize(edf("cast(dim4, 'DOUBLE')"),
ImmutableList.of("0", "3", "4", "5", "9"));
+ assertFilterMatchesSkipVectorize(NotDimFilter.of(edf("cast(dim4,
'DOUBLE')")), ImmutableList.of());
+ }
+
assertFilterMatchesSkipVectorize(edf("dim4 == '1'"),
ImmutableList.of("0"));
assertFilterMatchesSkipVectorize(edf("dim4 == '3'"),
ImmutableList.of("3"));
assertFilterMatchesSkipVectorize(edf("dim4 == '4'"), ImmutableList.of("4",
"5"));
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/OrFilterTest.java
b/processing/src/test/java/org/apache/druid/segment/filter/OrFilterTest.java
index 60da4f0c12a..dfc3d1f2a1f 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/OrFilterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/OrFilterTest.java
@@ -32,6 +32,7 @@ import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.query.filter.AndDimFilter;
+import org.apache.druid.query.filter.FilterTuning;
import org.apache.druid.query.filter.InDimFilter;
import org.apache.druid.query.filter.NotDimFilter;
import org.apache.druid.query.filter.OrDimFilter;
@@ -238,6 +239,31 @@ public class OrFilterTest extends BaseFilterTest
);
}
+ @Test
+ public void testTwoFilterFirstMatchesSomeSecondMatchesNonePartialIndex()
+ {
+ assertFilterMatches(
+ new OrDimFilter(
+ ImmutableList.of(
+ new InDimFilter("dim0", ImmutableSet.of("1", "2", "3"), null),
+ new SelectorDimFilter("dim1", "7", null, new
FilterTuning(false, null, null))
+ )
+ ),
+ ImmutableList.of("1", "2", "3")
+ );
+ assertFilterMatches(
+ NotDimFilter.of(
+ new OrDimFilter(
+ ImmutableList.of(
+ new InDimFilter("dim0", ImmutableSet.of("1", "2", "3"),
null),
+ new SelectorDimFilter("dim1", "7", null, new
FilterTuning(false, null, null))
+ )
+ )
+ ),
+ ImmutableList.of("0", "4", "5")
+ );
+ }
+
@Test
public void testTwoFilterFirstMatchesNoneSecondMatchesSome()
{
@@ -252,6 +278,20 @@ public class OrFilterTest extends BaseFilterTest
);
}
+ @Test
+ public void testTwoFilterFirstMatchesNoneSecondMatchesSomePartialIndex()
+ {
+ assertFilterMatches(
+ new OrDimFilter(
+ ImmutableList.of(
+ new SelectorDimFilter("dim1", "7", null, new
FilterTuning(false, null, null)),
+ new InDimFilter("dim0", ImmutableSet.of("1", "2", "3"), null)
+ )
+ ),
+ ImmutableList.of("1", "2", "3")
+ );
+ }
+
@Test
public void testTwoFilterFirstMatchesNoneSecondMatchesNone()
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]