This is an automated email from the ASF dual-hosted git repository.
abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 5ce4aab3b8b update ARRAY_OVERLAP to plan with ArrayContainsElement for
ARRAY columns (#15451)
5ce4aab3b8b is described below
commit 5ce4aab3b8b189b718ee33fb281889e6c4c21c9c
Author: Clint Wylie <[email protected]>
AuthorDate: Wed Nov 29 20:35:20 2023 -0800
update ARRAY_OVERLAP to plan with ArrayContainsElement for ARRAY columns
(#15451)
Updates ARRAY_OVERLAP to use the same ArrayContainsElement filter added in
#15366 when filtering ARRAY typed columns so that it can also use indexes like
ARRAY_CONTAINS.
---
.../benchmark/query/SqlExpressionBenchmark.java | 8 ++-
.../sql/calcite/expression/DruidExpression.java | 5 ++
.../builtin/ArrayContainsOperatorConversion.java | 16 ++---
.../builtin/ArrayOverlapOperatorConversion.java | 71 ++++++++++++++++++----
.../druid/sql/calcite/CalciteArraysQueryTest.java | 21 ++++++-
.../sql/calcite/CalciteNestedDataQueryTest.java | 3 +-
6 files changed, 96 insertions(+), 28 deletions(-)
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
index 83b3ed531f0..291a0727a1d 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -200,8 +200,9 @@ public class SqlExpressionBenchmark
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2,
SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
// 37: time shift + expr agg (group by), uniform distribution high
cardinality
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2,
SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
- // 38: array filtering
- "SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\",
100) GROUP BY 1,2"
+ // 38,39: array element filtering
+ "SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\",
100) GROUP BY 1,2",
+ "SELECT string1, long1 FROM foo WHERE ARRAY_OVERLAP(\"multi-string3\",
ARRAY[100, 200]) GROUP BY 1,2"
);
@Param({"5000000"})
@@ -260,7 +261,8 @@ public class SqlExpressionBenchmark
"35",
"36",
"37",
- "38"
+ "38",
+ "39"
})
private String query;
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
index b06f3e402f2..7faac91ab09 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
@@ -366,6 +366,11 @@ public class DruidExpression
return Preconditions.checkNotNull(simpleExtraction);
}
+ public boolean isArray()
+ {
+ return druidType != null && druidType.isArray();
+ }
+
/**
* Get sub {@link DruidExpression} arguments of this expression
*/
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
index c961ed04016..9036d7e406d 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
@@ -100,7 +100,7 @@ public class ArrayContainsOperatorConversion extends
BaseExpressionDimFilterOper
// if the input column is not actually an ARRAY type, but rather an MVD,
we can optimize this into
// selector/equality filters on the individual array elements
if (leftExpr.isSimpleExtraction()
- && !isArray(leftExpr)
+ && !leftExpr.isArray()
&& (plannerContext.isUseBoundsAndSelectors() ||
leftExpr.isDirectColumnAccess())) {
Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
// To convert this expression filter into an And of Selector filters, we
need to extract all array elements.
@@ -142,9 +142,9 @@ public class ArrayContainsOperatorConversion extends
BaseExpressionDimFilterOper
}
}
// if the input is a direct array column, we can use sweet array filter
- if (leftExpr.isDirectColumnAccess() && isArray(leftExpr)) {
+ if (leftExpr.isDirectColumnAccess() && leftExpr.isArray()) {
Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
- // To convert this expression filter into an And of ArrayContainsElement
filters, we need to extract all array
+ // To convert this expression filter into an AND of ArrayContainsElement
filters, we need to extract all array
// elements. For now, we can optimize only when rightExpr is a literal
because there is no way to extract the
// array elements by traversing the Expr. Note that all implementations
of Expr are defined as package-private
// classes in a different package.
@@ -154,6 +154,11 @@ public class ArrayContainsOperatorConversion extends
BaseExpressionDimFilterOper
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
if (exprEval.isArray()) {
final Object[] arrayElements = exprEval.asArray();
+ if (arrayElements.length == 0) {
+ // this isn't likely possible today because array constructor
function does not accept empty argument list
+ // but just in case, return null
+ return null;
+ }
final List<DimFilter> filters = new
ArrayList<>(arrayElements.length);
final ColumnType elementType =
ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type()));
for (final Object val : arrayElements) {
@@ -180,9 +185,4 @@ public class ArrayContainsOperatorConversion extends
BaseExpressionDimFilterOper
}
return toExpressionFilter(plannerContext, getDruidFunctionName(),
druidExpressions);
}
-
- private static boolean isArray(final DruidExpression expr)
- {
- return expr.getDruidType() != null && expr.getDruidType().isArray();
- }
}
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
index 24fac69d11d..af5c65d9f84 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
@@ -30,9 +30,12 @@ import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
+import org.apache.druid.query.filter.ArrayContainsElementFilter;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.InDimFilter;
+import org.apache.druid.query.filter.OrDimFilter;
+import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
@@ -41,6 +44,7 @@ import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry;
import javax.annotation.Nullable;
+import java.util.ArrayList;
import java.util.List;
public class ArrayOverlapOperatorConversion extends
BaseExpressionDimFilterOperatorConversion
@@ -90,29 +94,29 @@ public class ArrayOverlapOperatorConversion extends
BaseExpressionDimFilterOpera
}
// Converts array_overlaps() function into an OR of Selector filters if
possible.
- final DruidExpression leftExpression = druidExpressions.get(0);
- final DruidExpression rightExpression = druidExpressions.get(1);
- final boolean leftSimpleExtractionExpr =
leftExpression.isSimpleExtraction();
- final boolean leftArrayColumn = leftExpression.isDirectColumnAccess() &&
leftExpression.getDruidType() != null &&
leftExpression.getDruidType().isArray();
- final boolean rightSimpleExtractionExpr =
rightExpression.isSimpleExtraction();
- final boolean rightArrayColumn = rightExpression.isDirectColumnAccess() &&
rightExpression.getDruidType() != null &&
rightExpression.getDruidType().isArray();
+ final DruidExpression leftExpr = druidExpressions.get(0);
+ final DruidExpression rightExpr = druidExpressions.get(1);
+ final boolean leftSimpleExtractionExpr = leftExpr.isSimpleExtraction();
+ final boolean rightSimpleExtractionExpr = rightExpr.isSimpleExtraction();
final DruidExpression simpleExtractionExpr;
final DruidExpression complexExpr;
- if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr &&
!(leftArrayColumn || rightArrayColumn)) {
+ if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr) {
if (leftSimpleExtractionExpr) {
- simpleExtractionExpr = leftExpression;
- complexExpr = rightExpression;
+ simpleExtractionExpr = leftExpr;
+ complexExpr = rightExpr;
} else {
- simpleExtractionExpr = rightExpression;
- complexExpr = leftExpression;
+ simpleExtractionExpr = rightExpr;
+ complexExpr = leftExpr;
}
} else {
return toExpressionFilter(plannerContext, getDruidFunctionName(),
druidExpressions);
}
- Expr expr = plannerContext.parseExpression(complexExpr.getExpression());
- if (expr.isLiteral() && (plannerContext.isUseBoundsAndSelectors() ||
simpleExtractionExpr.isDirectColumnAccess())) {
+ final Expr expr =
plannerContext.parseExpression(complexExpr.getExpression());
+ if (expr.isLiteral()
+ && !simpleExtractionExpr.isArray()
+ && (plannerContext.isUseBoundsAndSelectors() ||
simpleExtractionExpr.isDirectColumnAccess())) {
// Evaluate the expression to take out the array elements.
// We can safely pass null if the expression is literal.
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
@@ -149,6 +153,47 @@ public class ArrayOverlapOperatorConversion extends
BaseExpressionDimFilterOpera
);
}
}
+
+ // if the input is a direct array column, we can use sweet array filter
+ if (simpleExtractionExpr.isDirectColumnAccess() &&
simpleExtractionExpr.isArray()) {
+ // To convert this expression filter into an OR of ArrayContainsElement
filters, we need to extract all array
+ // elements.
+ if (expr.isLiteral()) {
+ // Evaluate the expression to get out the array elements.
+ // We can safely pass a nil ObjectBinding if the expression is literal.
+ ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
+ if (exprEval.isArray()) {
+ final Object[] arrayElements = exprEval.asArray();
+ if (arrayElements.length == 0) {
+ // this isn't likely possible today because array constructor
function does not accept empty argument list
+ // but just in case, return null
+ return null;
+ }
+ final List<DimFilter> filters = new
ArrayList<>(arrayElements.length);
+ final ColumnType elementType =
ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type()));
+ for (final Object val : arrayElements) {
+ filters.add(
+ new ArrayContainsElementFilter(
+ leftExpr.getSimpleExtraction().getColumn(),
+ elementType,
+ val,
+ null
+ )
+ );
+ }
+
+ return filters.size() == 1 ? filters.get(0) : new
OrDimFilter(filters);
+ } else {
+ return new ArrayContainsElementFilter(
+ leftExpr.getSimpleExtraction().getColumn(),
+ ExpressionType.toColumnType(exprEval.type()),
+ exprEval.valueOrDefault(),
+ null
+ );
+ }
+ }
+ }
+
return toExpressionFilter(plannerContext, getDruidFunctionName(),
druidExpressions);
}
}
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
index 0f6e86edb18..2f27011b056 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
@@ -860,7 +860,12 @@ public class CalciteArraysQueryTest extends
BaseCalciteQueryTest
newScanQueryBuilder()
.dataSource(DATA_SOURCE_ARRAYS)
.intervals(querySegmentSpec(Filtration.eternity()))
-
.filters(expressionFilter("array_overlap(\"arrayStringNulls\",array('a','b'))"))
+ .filters(
+ or(
+ new ArrayContainsElementFilter("arrayStringNulls",
ColumnType.STRING, "a", null),
+ new ArrayContainsElementFilter("arrayStringNulls",
ColumnType.STRING, "b", null)
+ )
+ )
.columns("arrayStringNulls")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(5)
@@ -886,7 +891,12 @@ public class CalciteArraysQueryTest extends
BaseCalciteQueryTest
newScanQueryBuilder()
.dataSource(DATA_SOURCE_ARRAYS)
.intervals(querySegmentSpec(Filtration.eternity()))
-
.filters(expressionFilter("array_overlap(\"arrayLongNulls\",array(1,2))"))
+ .filters(
+ or(
+ new ArrayContainsElementFilter("arrayLongNulls",
ColumnType.LONG, 1L, null),
+ new ArrayContainsElementFilter("arrayLongNulls",
ColumnType.LONG, 2L, null)
+ )
+ )
.columns("arrayLongNulls")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(5)
@@ -912,7 +922,12 @@ public class CalciteArraysQueryTest extends
BaseCalciteQueryTest
newScanQueryBuilder()
.dataSource(DATA_SOURCE_ARRAYS)
.intervals(querySegmentSpec(Filtration.eternity()))
-
.filters(expressionFilter("array_overlap(\"arrayDoubleNulls\",array(1.1,2.2))"))
+ .filters(
+ or(
+ new ArrayContainsElementFilter("arrayDoubleNulls",
ColumnType.DOUBLE, 1.1, null),
+ new ArrayContainsElementFilter("arrayDoubleNulls",
ColumnType.DOUBLE, 2.2, null)
+ )
+ )
.columns("arrayDoubleNulls")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(5)
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 9c0677a0f15..22e16f6e7e1 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -1563,7 +1563,8 @@ public class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
.setDimFilter(
or(
new
ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 1L, null),
-
expressionFilter("array_overlap(\"arrayLongNulls\",array(2,3))")
+ new
ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 2L, null),
+ new
ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 3L, null)
)
)
.setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]