This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 1b5b61ef7f4 add multi-value string object vector matcher and
expression vector object selectors (#17162)
1b5b61ef7f4 is described below
commit 1b5b61ef7f4f7ae20a79f7194078f59635c40fb7
Author: Clint Wylie <[email protected]>
AuthorDate: Wed Sep 25 22:57:29 2024 -0700
add multi-value string object vector matcher and expression vector object
selectors (#17162)
---
.../MultiValueStringObjectVectorValueMatcher.java | 128 +++++++++++++++++++++
.../VectorValueMatcherColumnProcessorFactory.java | 3 +
.../druid/segment/filter/ExpressionFilter.java | 6 +-
.../ExpressionMultiValueDimensionSelector.java | 10 ++
...essionVectorMultiValueStringObjectSelector.java | 81 +++++++++++++
.../segment/virtual/ExpressionVectorSelectors.java | 18 ++-
.../segment/virtual/ExpressionVirtualColumn.java | 2 +-
.../druid/segment/filter/BaseFilterTest.java | 6 +-
.../druid/segment/filter/EqualityFilterTests.java | 94 +++++++++++++++
9 files changed, 341 insertions(+), 7 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringObjectVectorValueMatcher.java
b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringObjectVectorValueMatcher.java
new file mode 100644
index 00000000000..3e6c32efee9
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringObjectVectorValueMatcher.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.filter.vector;
+
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.query.filter.DruidObjectPredicate;
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+import java.util.List;
+import java.util.Objects;
+
+public class MultiValueStringObjectVectorValueMatcher implements
VectorValueMatcherFactory
+{
+ protected final VectorObjectSelector selector;
+
+ public MultiValueStringObjectVectorValueMatcher(final VectorObjectSelector
selector)
+ {
+ this.selector = selector;
+ }
+
+ @Override
+ public VectorValueMatcher makeMatcher(@Nullable String value)
+ {
+ return new BaseVectorValueMatcher(selector)
+ {
+ final VectorMatch match = VectorMatch.wrap(new
int[selector.getMaxVectorSize()]);
+
+ @Override
+ public ReadableVectorMatch match(final ReadableVectorMatch mask, boolean
includeUnknown)
+ {
+ final Object[] vector = selector.getObjectVector();
+ final int[] selection = match.getSelection();
+
+ int numRows = 0;
+
+ for (int i = 0; i < mask.getSelectionSize(); i++) {
+ final int rowNum = mask.getSelection()[i];
+ final Object val = vector[rowNum];
+ if (val instanceof List) {
+ for (Object o : (List) val) {
+ if ((o == null && includeUnknown) || Objects.equals(value, o)) {
+ selection[numRows++] = rowNum;
+ break;
+ }
+ }
+ } else {
+ if ((val == null && includeUnknown) || Objects.equals(value, val))
{
+ selection[numRows++] = rowNum;
+ }
+ }
+ }
+
+ match.setSelectionSize(numRows);
+ return match;
+ }
+ };
+ }
+
+ @Override
+ public VectorValueMatcher makeMatcher(Object matchValue, ColumnType
matchValueType)
+ {
+ final ExprEval<?> eval =
ExprEval.ofType(ExpressionType.fromColumnType(matchValueType), matchValue);
+ final ExprEval<?> castForComparison =
ExprEval.castForEqualityComparison(eval, ExpressionType.STRING);
+ if (castForComparison == null || castForComparison.asString() == null) {
+ return VectorValueMatcher.allFalseObjectMatcher(selector);
+ }
+ return makeMatcher(castForComparison.asString());
+ }
+
+ @Override
+ public VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory)
+ {
+ final DruidObjectPredicate<String> predicate =
predicateFactory.makeStringPredicate();
+
+ return new BaseVectorValueMatcher(selector)
+ {
+ final VectorMatch match = VectorMatch.wrap(new
int[selector.getMaxVectorSize()]);
+
+ @Override
+ public ReadableVectorMatch match(final ReadableVectorMatch mask, boolean
includeUnknown)
+ {
+ final Object[] vector = selector.getObjectVector();
+ final int[] selection = match.getSelection();
+
+ int numRows = 0;
+
+ for (int i = 0; i < mask.getSelectionSize(); i++) {
+ final int rowNum = mask.getSelection()[i];
+ Object val = vector[rowNum];
+ if (val instanceof List) {
+ for (Object o : (List) val) {
+ if (predicate.apply((String) o).matches(includeUnknown)) {
+ selection[numRows++] = rowNum;
+ break;
+ }
+ }
+ } else if (predicate.apply((String) val).matches(includeUnknown)) {
+ selection[numRows++] = rowNum;
+ }
+ }
+
+ match.setSelectionSize(numRows);
+ return match;
+ }
+ };
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java
b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java
index 0d16ee24230..d8cdd509e71 100644
---
a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java
+++
b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java
@@ -99,6 +99,9 @@ public class VectorValueMatcherColumnProcessorFactory
implements VectorColumnPro
)
{
if (capabilities.is(ValueType.STRING)) {
+ if (capabilities.hasMultipleValues().isTrue()) {
+ return new MultiValueStringObjectVectorValueMatcher(selector);
+ }
return new StringObjectVectorValueMatcher(selector);
}
return new ObjectVectorValueMatcher(selector);
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
index ccd53e96bc2..dcd818489ca 100644
---
a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
+++
b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
@@ -121,18 +121,18 @@ public class ExpressionFilter implements Filter
case STRING:
return
VectorValueMatcherColumnProcessorFactory.instance().makeObjectProcessor(
ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities(),
- ExpressionVectorSelectors.makeVectorObjectSelector(factory,
theExpr)
+ ExpressionVectorSelectors.makeVectorObjectSelector(factory,
theExpr, null)
).makeMatcher(predicateFactory);
case ARRAY:
return
VectorValueMatcherColumnProcessorFactory.instance().makeObjectProcessor(
ColumnCapabilitiesImpl.createDefault().setType(ExpressionType.toColumnType(outputType)).setHasNulls(true),
- ExpressionVectorSelectors.makeVectorObjectSelector(factory,
theExpr)
+ ExpressionVectorSelectors.makeVectorObjectSelector(factory,
theExpr, null)
).makeMatcher(predicateFactory);
default:
if (ExpressionType.NESTED_DATA.equals(outputType)) {
return
VectorValueMatcherColumnProcessorFactory.instance().makeObjectProcessor(
ColumnCapabilitiesImpl.createDefault().setType(ExpressionType.toColumnType(outputType)).setHasNulls(true),
- ExpressionVectorSelectors.makeVectorObjectSelector(factory,
theExpr)
+ ExpressionVectorSelectors.makeVectorObjectSelector(factory,
theExpr, null)
).makeMatcher(predicateFactory);
}
throw new UOE("Vectorized expression matchers not implemented for
type: [%s]", outputType);
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionMultiValueDimensionSelector.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionMultiValueDimensionSelector.java
index 031bc46cc1d..dd70b3566e1 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionMultiValueDimensionSelector.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionMultiValueDimensionSelector.java
@@ -76,10 +76,14 @@ public class ExpressionMultiValueDimensionSelector
implements DimensionSelector
return evaluated.asString();
}
+ @Nullable
List<String> getArrayAsList(ExprEval evaluated)
{
assert evaluated.isArray();
//noinspection ConstantConditions
+ if (evaluated.asArray() == null) {
+ return null;
+ }
return Arrays.stream(evaluated.asArray())
.map(Evals::asString)
.collect(Collectors.toList());
@@ -133,6 +137,9 @@ public class ExpressionMultiValueDimensionSelector
implements DimensionSelector
ExprEval evaluated = getEvaluated();
if (evaluated.isArray()) {
List<String> array = getArrayAsList(evaluated);
+ if (array == null) {
+ return includeUnknown || value == null;
+ }
return array.stream().anyMatch(x -> (includeUnknown && x == null) ||
Objects.equals(x, value));
}
final String rowValue = getValue(evaluated);
@@ -159,6 +166,9 @@ public class ExpressionMultiValueDimensionSelector
implements DimensionSelector
final DruidObjectPredicate<String> predicate =
predicateFactory.makeStringPredicate();
if (evaluated.isArray()) {
List<String> array = getArrayAsList(evaluated);
+ if (array == null) {
+ return predicate.apply(null).matches(includeUnknown);
+ }
return array.stream().anyMatch(x ->
predicate.apply(x).matches(includeUnknown));
}
final String rowValue = getValue(evaluated);
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorMultiValueStringObjectSelector.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorMultiValueStringObjectSelector.java
new file mode 100644
index 00000000000..8c2ce93dba7
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorMultiValueStringObjectSelector.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.virtual;
+
+import com.google.common.base.Preconditions;
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+
+import java.util.Arrays;
+
+public class ExpressionVectorMultiValueStringObjectSelector implements
VectorObjectSelector
+{
+ private final Expr.VectorInputBinding bindings;
+ private final ExprVectorProcessor<?> processor;
+
+ @MonotonicNonNull
+ private Object[] cached;
+ private int currentId = ReadableVectorInspector.NULL_ID;
+
+ public ExpressionVectorMultiValueStringObjectSelector(
+ ExprVectorProcessor<?> processor,
+ Expr.VectorInputBinding bindings
+ )
+ {
+ this.processor = Preconditions.checkNotNull(processor, "processor");
+ this.bindings = Preconditions.checkNotNull(bindings, "bindings");
+ this.cached = new Object[bindings.getMaxVectorSize()];
+ }
+
+ @Override
+ public Object[] getObjectVector()
+ {
+ if (bindings.getCurrentVectorId() != currentId) {
+ currentId = bindings.getCurrentVectorId();
+ final Object[] tmp = processor.evalVector(bindings).getObjectVector();
+ for (int i = 0; i < bindings.getCurrentVectorSize(); i++) {
+ Object[] tmpi = (Object[]) tmp[i];
+ if (tmpi == null) {
+ cached[i] = null;
+ } else if (tmpi.length == 1) {
+ cached[i] = tmpi[0];
+ } else {
+ cached[i] = Arrays.asList(tmpi);
+ }
+ }
+ }
+ return cached;
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return bindings.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return bindings.getCurrentVectorSize();
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
index 8578be228d5..c0776a2356c 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
@@ -21,6 +21,7 @@ package org.apache.druid.segment.virtual;
import com.google.common.base.Preconditions;
import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExprType;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
@@ -33,6 +34,8 @@ import
org.apache.druid.query.groupby.epinephelinae.vector.GroupByVectorColumnSe
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.Types;
+import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.vector.ConstantVectorSelectors;
import org.apache.druid.segment.vector.ReadableVectorInspector;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
@@ -94,21 +97,32 @@ public class ExpressionVectorSelectors
public static VectorObjectSelector makeVectorObjectSelector(
VectorColumnSelectorFactory factory,
- Expr expression
+ Expr expression,
+ @Nullable ColumnType outputTypeHint
)
{
final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
if (plan.isConstant()) {
+ final ExprEval<?> eval =
plan.getExpression().eval(InputBindings.nilBindings());
+ if (Types.is(outputTypeHint, ValueType.STRING) && eval.type().isArray())
{
+ return ConstantVectorSelectors.vectorObjectSelector(
+ factory.getReadableVectorInspector(),
+ ExpressionSelectors.coerceEvalToObjectOrList(eval)
+ );
+ }
return ConstantVectorSelectors.vectorObjectSelector(
factory.getReadableVectorInspector(),
-
plan.getExpression().eval(InputBindings.nilBindings()).valueOrDefault()
+ eval.valueOrDefault()
);
}
final Expr.VectorInputBinding bindings =
createVectorBindings(plan.getAnalysis(), factory);
final ExprVectorProcessor<?> processor =
plan.getExpression().asVectorProcessor(bindings);
+ if (Types.is(outputTypeHint, ValueType.STRING) &&
processor.getOutputType().isArray()) {
+ return new ExpressionVectorMultiValueStringObjectSelector(processor,
bindings);
+ }
return new ExpressionVectorObjectSelector(processor, bindings);
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index e6f4d57e1d5..8bb62128f9d 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -238,7 +238,7 @@ public class ExpressionVirtualColumn implements
VirtualColumn
return
factory.makeObjectSelector(parsedExpression.get().getBindingIfIdentifier());
}
- return ExpressionVectorSelectors.makeVectorObjectSelector(factory,
parsedExpression.get());
+ return ExpressionVectorSelectors.makeVectorObjectSelector(factory,
parsedExpression.get(), expression.outputType);
}
@Nullable
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
index 2e3ae0b633f..ed4ff921a9c 100644
---
a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
@@ -163,7 +163,11 @@ public abstract class BaseFilterTest extends
InitializedNullHandlingTest
new NestedFieldVirtualColumn("nested", "$.l0", "nested.l0",
ColumnType.LONG),
new NestedFieldVirtualColumn("nested", "$.arrayLong",
"nested.arrayLong", ColumnType.LONG_ARRAY),
new NestedFieldVirtualColumn("nested", "$.arrayDouble",
"nested.arrayDouble", ColumnType.DOUBLE_ARRAY),
- new NestedFieldVirtualColumn("nested", "$.arrayString",
"nested.arrayString", ColumnType.STRING_ARRAY)
+ new NestedFieldVirtualColumn("nested", "$.arrayString",
"nested.arrayString", ColumnType.STRING_ARRAY),
+ new ExpressionVirtualColumn("arrayLongAsMvd",
"array_to_mv(arrayLong)", ColumnType.STRING, TestExprMacroTable.INSTANCE),
+ new ExpressionVirtualColumn("arrayDoubleAsMvd",
"array_to_mv(arrayDouble)", ColumnType.STRING, TestExprMacroTable.INSTANCE),
+ new ExpressionVirtualColumn("arrayStringAsMvd",
"array_to_mv(arrayString)", ColumnType.STRING, TestExprMacroTable.INSTANCE),
+ new ExpressionVirtualColumn("arrayConstantAsMvd",
"array_to_mv(array(1,2,3))", ColumnType.STRING, TestExprMacroTable.INSTANCE)
)
);
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
index 0259a2c3432..9e29ff266b1 100644
---
a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
+++
b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
@@ -1629,6 +1629,100 @@ public class EqualityFilterTests
: ImmutableList.of("0", "1", "2", "3", "4", "5")
);
}
+
+ @Test
+ public void testArraysAsMvds()
+ {
+ Assume.assumeTrue(canTestArrayColumns());
+ /*
+ dim0 .. arrayString arrayLong arrayDouble
+ "0", .. ["a", "b", "c"], [1L, 2L, 3L], [1.1, 2.2,
3.3]
+ "1", .. [], [], [1.1, 2.2,
3.3]
+ "2", .. null, [1L, 2L, 3L], [null]
+ "3", .. ["a", "b", "c"], null, []
+ "4", .. ["c", "d"], [null], [-1.1,
-333.3]
+ "5", .. [null], [123L, 345L], null
+ */
+
+ assertFilterMatches(
+ new EqualityFilter(
+ "arrayStringAsMvd",
+ ColumnType.STRING,
+ "b",
+ null
+ ),
+ ImmutableList.of("0", "3")
+ );
+ assertFilterMatches(
+ NotDimFilter.of(
+ new EqualityFilter(
+ "arrayStringAsMvd",
+ ColumnType.STRING,
+ "b",
+ null
+ )
+ ),
+ NullHandling.sqlCompatible()
+ ? ImmutableList.of("1", "4")
+ : ImmutableList.of("1", "2", "4", "5")
+ );
+
+ assertFilterMatches(
+ new EqualityFilter(
+ "arrayLongAsMvd",
+ ColumnType.STRING,
+ "2",
+ null
+ ),
+ ImmutableList.of("0", "2")
+ );
+ assertFilterMatches(
+ NotDimFilter.of(
+ new EqualityFilter(
+ "arrayLongAsMvd",
+ ColumnType.STRING,
+ "2",
+ null
+ )
+ ),
+ NullHandling.sqlCompatible()
+ ? ImmutableList.of("1", "5")
+ : ImmutableList.of("1", "3", "4", "5")
+ );
+
+ assertFilterMatches(
+ new EqualityFilter(
+ "arrayDoubleAsMvd",
+ ColumnType.STRING,
+ "3.3",
+ null
+ ),
+ ImmutableList.of("0", "1")
+ );
+ assertFilterMatches(
+ NotDimFilter.of(
+ new EqualityFilter(
+ "arrayDoubleAsMvd",
+ ColumnType.STRING,
+ "3.3",
+ null
+ )
+ ),
+ NullHandling.sqlCompatible()
+ ? ImmutableList.of("3", "4")
+ : ImmutableList.of("2", "3", "4", "5")
+ );
+
+ assertFilterMatches(
+ new EqualityFilter(
+ "arrayConstantAsMvd",
+ ColumnType.STRING,
+ "3",
+ null
+ ),
+ ImmutableList.of("0", "1", "2", "3", "4", "5")
+ );
+ }
}
public static class EqualityFilterNonParameterizedTests extends
InitializedNullHandlingTest
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]