This is an automated email from the ASF dual-hosted git repository.

abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 5ce4aab3b8b update ARRAY_OVERLAP to plan with ArrayContainsElement for 
ARRAY columns (#15451)
5ce4aab3b8b is described below

commit 5ce4aab3b8b189b718ee33fb281889e6c4c21c9c
Author: Clint Wylie <[email protected]>
AuthorDate: Wed Nov 29 20:35:20 2023 -0800

    update ARRAY_OVERLAP to plan with ArrayContainsElement for ARRAY columns 
(#15451)
    
    Updates ARRAY_OVERLAP to use the same ArrayContainsElement filter added in 
#15366 when filtering ARRAY typed columns so that it can also use indexes like 
ARRAY_CONTAINS.
---
 .../benchmark/query/SqlExpressionBenchmark.java    |  8 ++-
 .../sql/calcite/expression/DruidExpression.java    |  5 ++
 .../builtin/ArrayContainsOperatorConversion.java   | 16 ++---
 .../builtin/ArrayOverlapOperatorConversion.java    | 71 ++++++++++++++++++----
 .../druid/sql/calcite/CalciteArraysQueryTest.java  | 21 ++++++-
 .../sql/calcite/CalciteNestedDataQueryTest.java    |  3 +-
 6 files changed, 96 insertions(+), 28 deletions(-)

diff --git 
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
 
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
index 83b3ed531f0..291a0727a1d 100644
--- 
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
+++ 
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -200,8 +200,9 @@ public class SqlExpressionBenchmark
       "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, 
SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
       // 37: time shift + expr agg (group by), uniform distribution high 
cardinality
       "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, 
SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
-      // 38: array filtering
-      "SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 
100) GROUP BY 1,2"
+      // 38,39: array element filtering
+      "SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 
100) GROUP BY 1,2",
+      "SELECT string1, long1 FROM foo WHERE ARRAY_OVERLAP(\"multi-string3\", 
ARRAY[100, 200]) GROUP BY 1,2"
   );
 
   @Param({"5000000"})
@@ -260,7 +261,8 @@ public class SqlExpressionBenchmark
       "35",
       "36",
       "37",
-      "38"
+      "38",
+      "39"
   })
   private String query;
 
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
index b06f3e402f2..7faac91ab09 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
@@ -366,6 +366,11 @@ public class DruidExpression
     return Preconditions.checkNotNull(simpleExtraction);
   }
 
+  public boolean isArray()
+  {
+    return druidType != null && druidType.isArray();
+  }
+
   /**
    * Get sub {@link DruidExpression} arguments of this expression
    */
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
index c961ed04016..9036d7e406d 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
@@ -100,7 +100,7 @@ public class ArrayContainsOperatorConversion extends 
BaseExpressionDimFilterOper
     // if the input column is not actually an ARRAY type, but rather an MVD, 
we can optimize this into
     // selector/equality filters on the individual array elements
     if (leftExpr.isSimpleExtraction()
-        && !isArray(leftExpr)
+        && !leftExpr.isArray()
         && (plannerContext.isUseBoundsAndSelectors() || 
leftExpr.isDirectColumnAccess())) {
       Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
       // To convert this expression filter into an And of Selector filters, we 
need to extract all array elements.
@@ -142,9 +142,9 @@ public class ArrayContainsOperatorConversion extends 
BaseExpressionDimFilterOper
       }
     }
     // if the input is a direct array column, we can use sweet array filter
-    if (leftExpr.isDirectColumnAccess() && isArray(leftExpr)) {
+    if (leftExpr.isDirectColumnAccess() && leftExpr.isArray()) {
       Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
-      // To convert this expression filter into an And of ArrayContainsElement 
filters, we need to extract all array
+      // To convert this expression filter into an AND of ArrayContainsElement 
filters, we need to extract all array
       // elements. For now, we can optimize only when rightExpr is a literal 
because there is no way to extract the
       // array elements by traversing the Expr. Note that all implementations 
of Expr are defined as package-private
       // classes in a different package.
@@ -154,6 +154,11 @@ public class ArrayContainsOperatorConversion extends 
BaseExpressionDimFilterOper
         ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
         if (exprEval.isArray()) {
           final Object[] arrayElements = exprEval.asArray();
+          if (arrayElements.length == 0) {
+            // this isn't likely possible today because array constructor 
function does not accept empty argument list
+            // but just in case, return null
+            return null;
+          }
           final List<DimFilter> filters = new 
ArrayList<>(arrayElements.length);
           final ColumnType elementType = 
ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type()));
           for (final Object val : arrayElements) {
@@ -180,9 +185,4 @@ public class ArrayContainsOperatorConversion extends 
BaseExpressionDimFilterOper
     }
     return toExpressionFilter(plannerContext, getDruidFunctionName(), 
druidExpressions);
   }
-
-  private static boolean isArray(final DruidExpression expr)
-  {
-    return expr.getDruidType() != null && expr.getDruidType().isArray();
-  }
 }
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
index 24fac69d11d..af5c65d9f84 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
@@ -30,9 +30,12 @@ import org.apache.druid.math.expr.Expr;
 import org.apache.druid.math.expr.ExprEval;
 import org.apache.druid.math.expr.ExpressionType;
 import org.apache.druid.math.expr.InputBindings;
+import org.apache.druid.query.filter.ArrayContainsElementFilter;
 import org.apache.druid.query.filter.DimFilter;
 import org.apache.druid.query.filter.EqualityFilter;
 import org.apache.druid.query.filter.InDimFilter;
+import org.apache.druid.query.filter.OrDimFilter;
+import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.column.RowSignature;
 import org.apache.druid.sql.calcite.expression.DruidExpression;
 import org.apache.druid.sql.calcite.expression.Expressions;
@@ -41,6 +44,7 @@ import org.apache.druid.sql.calcite.planner.PlannerContext;
 import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry;
 
 import javax.annotation.Nullable;
+import java.util.ArrayList;
 import java.util.List;
 
 public class ArrayOverlapOperatorConversion extends 
BaseExpressionDimFilterOperatorConversion
@@ -90,29 +94,29 @@ public class ArrayOverlapOperatorConversion extends 
BaseExpressionDimFilterOpera
     }
 
     // Converts array_overlaps() function into an OR of Selector filters if 
possible.
-    final DruidExpression leftExpression = druidExpressions.get(0);
-    final DruidExpression rightExpression = druidExpressions.get(1);
-    final boolean leftSimpleExtractionExpr = 
leftExpression.isSimpleExtraction();
-    final boolean leftArrayColumn = leftExpression.isDirectColumnAccess() && 
leftExpression.getDruidType() != null && 
leftExpression.getDruidType().isArray();
-    final boolean rightSimpleExtractionExpr = 
rightExpression.isSimpleExtraction();
-    final boolean rightArrayColumn = rightExpression.isDirectColumnAccess() && 
rightExpression.getDruidType() != null && 
rightExpression.getDruidType().isArray();
+    final DruidExpression leftExpr = druidExpressions.get(0);
+    final DruidExpression rightExpr = druidExpressions.get(1);
+    final boolean leftSimpleExtractionExpr = leftExpr.isSimpleExtraction();
+    final boolean rightSimpleExtractionExpr = rightExpr.isSimpleExtraction();
     final DruidExpression simpleExtractionExpr;
     final DruidExpression complexExpr;
 
-    if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr && 
!(leftArrayColumn || rightArrayColumn)) {
+    if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr) {
       if (leftSimpleExtractionExpr) {
-        simpleExtractionExpr = leftExpression;
-        complexExpr = rightExpression;
+        simpleExtractionExpr = leftExpr;
+        complexExpr = rightExpr;
       } else {
-        simpleExtractionExpr = rightExpression;
-        complexExpr = leftExpression;
+        simpleExtractionExpr = rightExpr;
+        complexExpr = leftExpr;
       }
     } else {
       return toExpressionFilter(plannerContext, getDruidFunctionName(), 
druidExpressions);
     }
 
-    Expr expr = plannerContext.parseExpression(complexExpr.getExpression());
-    if (expr.isLiteral() && (plannerContext.isUseBoundsAndSelectors() || 
simpleExtractionExpr.isDirectColumnAccess())) {
+    final Expr expr = 
plannerContext.parseExpression(complexExpr.getExpression());
+    if (expr.isLiteral()
+        && !simpleExtractionExpr.isArray()
+        && (plannerContext.isUseBoundsAndSelectors() || 
simpleExtractionExpr.isDirectColumnAccess())) {
       // Evaluate the expression to take out the array elements.
       // We can safely pass null if the expression is literal.
       ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
@@ -149,6 +153,47 @@ public class ArrayOverlapOperatorConversion extends 
BaseExpressionDimFilterOpera
         );
       }
     }
+
+    // if the input is a direct array column, we can use sweet array filter
+    if (simpleExtractionExpr.isDirectColumnAccess() && 
simpleExtractionExpr.isArray()) {
+      // To convert this expression filter into an OR of ArrayContainsElement 
filters, we need to extract all array
+      // elements.
+      if (expr.isLiteral()) {
+        // Evaluate the expression to get out the array elements.
+        // We can safely pass a nil ObjectBinding if the expression is literal.
+        ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
+        if (exprEval.isArray()) {
+          final Object[] arrayElements = exprEval.asArray();
+          if (arrayElements.length == 0) {
+            // this isn't likely possible today because array constructor 
function does not accept empty argument list
+            // but just in case, return null
+            return null;
+          }
+          final List<DimFilter> filters = new 
ArrayList<>(arrayElements.length);
+          final ColumnType elementType = 
ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type()));
+          for (final Object val : arrayElements) {
+            filters.add(
+                new ArrayContainsElementFilter(
+                    leftExpr.getSimpleExtraction().getColumn(),
+                    elementType,
+                    val,
+                    null
+                )
+            );
+          }
+
+          return filters.size() == 1 ? filters.get(0) : new 
OrDimFilter(filters);
+        } else {
+          return new ArrayContainsElementFilter(
+              leftExpr.getSimpleExtraction().getColumn(),
+              ExpressionType.toColumnType(exprEval.type()),
+              exprEval.valueOrDefault(),
+              null
+          );
+        }
+      }
+    }
+
     return toExpressionFilter(plannerContext, getDruidFunctionName(), 
druidExpressions);
   }
 }
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
index 0f6e86edb18..2f27011b056 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
@@ -860,7 +860,12 @@ public class CalciteArraysQueryTest extends 
BaseCalciteQueryTest
             newScanQueryBuilder()
                 .dataSource(DATA_SOURCE_ARRAYS)
                 .intervals(querySegmentSpec(Filtration.eternity()))
-                
.filters(expressionFilter("array_overlap(\"arrayStringNulls\",array('a','b'))"))
+                .filters(
+                    or(
+                        new ArrayContainsElementFilter("arrayStringNulls", 
ColumnType.STRING, "a", null),
+                        new ArrayContainsElementFilter("arrayStringNulls", 
ColumnType.STRING, "b", null)
+                    )
+                )
                 .columns("arrayStringNulls")
                 
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
                 .limit(5)
@@ -886,7 +891,12 @@ public class CalciteArraysQueryTest extends 
BaseCalciteQueryTest
             newScanQueryBuilder()
                 .dataSource(DATA_SOURCE_ARRAYS)
                 .intervals(querySegmentSpec(Filtration.eternity()))
-                
.filters(expressionFilter("array_overlap(\"arrayLongNulls\",array(1,2))"))
+                .filters(
+                    or(
+                        new ArrayContainsElementFilter("arrayLongNulls", 
ColumnType.LONG, 1L, null),
+                        new ArrayContainsElementFilter("arrayLongNulls", 
ColumnType.LONG, 2L, null)
+                    )
+                )
                 .columns("arrayLongNulls")
                 
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
                 .limit(5)
@@ -912,7 +922,12 @@ public class CalciteArraysQueryTest extends 
BaseCalciteQueryTest
             newScanQueryBuilder()
                 .dataSource(DATA_SOURCE_ARRAYS)
                 .intervals(querySegmentSpec(Filtration.eternity()))
-                
.filters(expressionFilter("array_overlap(\"arrayDoubleNulls\",array(1.1,2.2))"))
+                .filters(
+                    or(
+                        new ArrayContainsElementFilter("arrayDoubleNulls", 
ColumnType.DOUBLE, 1.1, null),
+                        new ArrayContainsElementFilter("arrayDoubleNulls", 
ColumnType.DOUBLE, 2.2, null)
+                    )
+                )
                 .columns("arrayDoubleNulls")
                 
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
                 .limit(5)
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 9c0677a0f15..22e16f6e7e1 100644
--- 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++ 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -1563,7 +1563,8 @@ public class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
                             .setDimFilter(
                                 or(
                                     new 
ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 1L, null),
-                                    
expressionFilter("array_overlap(\"arrayLongNulls\",array(2,3))")
+                                    new 
ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 2L, null),
+                                    new 
ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 3L, null)
                                 )
                             )
                             .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "cnt")))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to