This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new cf6a9204f8b feat(common): add ARRAYHASANY scalar UDF for arrays 
(int/long/float/double/string)\n\n- New PinotScalarFunction: 
ArrayHasAnyScalarFunction (ARRAYHASANY)\n- Type-dispatched implementation via 
FunctionInfo map\n- Integration tests: ArrayTest adds literals, column filters, 
and same-column self-overlap checks\n- Minor planner wiring update in 
PinotOperatorTable (#17156)
cf6a9204f8b is described below

commit cf6a9204f8bdb5ed941f35ad6d7dc78a7b27d57a
Author: Xiang Fu <[email protected]>
AuthorDate: Fri Nov 7 12:25:03 2025 -0800

    feat(common): add ARRAYHASANY scalar UDF for arrays 
(int/long/float/double/string)\n\n- New PinotScalarFunction: 
ArrayHasAnyScalarFunction (ARRAYHASANY)\n- Type-dispatched implementation via 
FunctionInfo map\n- Integration tests: ArrayTest adds literals, column filters, 
and same-column self-overlap checks\n- Minor planner wiring update in 
PinotOperatorTable (#17156)
---
 .../scalar/array/ArraysOverlapScalarFunction.java  | 181 +++++++++++++++++++++
 .../pinot/integration/tests/custom/ArrayTest.java  |  86 ++++++++++
 2 files changed, 267 insertions(+)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/array/ArraysOverlapScalarFunction.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/array/ArraysOverlapScalarFunction.java
new file mode 100644
index 00000000000..438cad4ae4e
--- /dev/null
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/array/ArraysOverlapScalarFunction.java
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.function.scalar.array;
+
+import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet;
+import it.unimi.dsi.fastutil.floats.FloatOpenHashSet;
+import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
+import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
+import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nullable;
+import org.apache.calcite.sql.type.OperandTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlTypeFamily;
+import org.apache.pinot.common.function.FunctionInfo;
+import org.apache.pinot.common.function.PinotScalarFunction;
+import org.apache.pinot.common.function.sql.PinotSqlFunction;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.spi.annotations.ScalarFunction;
+
+
+@ScalarFunction(names = {"ARRAYS_OVERLAP", "ARRAYSOVERLAP"})
+public class ArraysOverlapScalarFunction implements PinotScalarFunction {
+
+  private static final Map<DataSchema.ColumnDataType, FunctionInfo>
+      TYPE_FUNCTION_INFO_MAP = new EnumMap<>(DataSchema.ColumnDataType.class);
+
+  static {
+    try {
+      TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.INT_ARRAY,
+          new 
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap", 
int[].class, int[].class),
+              ArraysOverlapScalarFunction.class, false));
+      TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.LONG_ARRAY,
+          new 
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap", 
long[].class, long[].class),
+              ArraysOverlapScalarFunction.class, false));
+      TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.FLOAT_ARRAY,
+          new 
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap", 
float[].class, float[].class),
+              ArraysOverlapScalarFunction.class, false));
+      TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.DOUBLE_ARRAY,
+          new 
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap", 
double[].class, double[].class),
+              ArraysOverlapScalarFunction.class, false));
+      TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.STRING_ARRAY,
+          new 
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap", 
String[].class, String[].class),
+              ArraysOverlapScalarFunction.class, false));
+    } catch (NoSuchMethodException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public String getName() {
+    return "ARRAYS_OVERLAP";
+  }
+
+  @Override
+  public Set<String> getNames() {
+    return Set.of("ARRAYS_OVERLAP", "ARRAYSOVERLAP");
+  }
+
+  @Nullable
+  @Override
+  public PinotSqlFunction toPinotSqlFunction() {
+    return new PinotSqlFunction("ARRAYS_OVERLAP", ReturnTypes.BOOLEAN,
+        OperandTypes.family(List.of(SqlTypeFamily.ARRAY, 
SqlTypeFamily.ARRAY)));
+  }
+
+  @Nullable
+  @Override
+  public FunctionInfo getFunctionInfo(DataSchema.ColumnDataType[] 
argumentTypes) {
+    if (argumentTypes.length != 2) {
+      return null;
+    }
+    if (argumentTypes[0] != argumentTypes[1]) {
+      return null;
+    }
+    return TYPE_FUNCTION_INFO_MAP.get(argumentTypes[0]);
+  }
+
+  @Nullable
+  @Override
+  public FunctionInfo getFunctionInfo(int numArguments) {
+    if (numArguments != 2) {
+      return null;
+    }
+    // Fall back to string
+    return getFunctionInfo(new DataSchema.ColumnDataType[]{
+        DataSchema.ColumnDataType.STRING_ARRAY,
+        DataSchema.ColumnDataType.STRING_ARRAY
+    });
+  }
+
+  private static boolean overlapInts(int[] small, int[] large) {
+    IntOpenHashSet set = new IntOpenHashSet(small);
+    for (int v : large) {
+      if (set.contains(v)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private static boolean overlapLongs(long[] small, long[] large) {
+    LongOpenHashSet set = new LongOpenHashSet(small);
+    for (long v : large) {
+      if (set.contains(v)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private static boolean overlapFloats(float[] small, float[] large) {
+    FloatOpenHashSet set = new FloatOpenHashSet(small);
+    for (float v : large) {
+      if (set.contains(v)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private static boolean overlapDoubles(double[] small, double[] large) {
+    DoubleOpenHashSet set = new DoubleOpenHashSet(small);
+
+    for (double v : large) {
+      if (set.contains(v)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private static boolean overlapStrings(String[] small, String[] large) {
+    ObjectOpenHashSet<String> set = new ObjectOpenHashSet<>(small);
+    for (String v : large) {
+      if (set.contains(v)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public static boolean arraysOverlap(int[] array1, int[] array2) {
+    return array1.length <= array2.length ? overlapInts(array1, array2) : 
overlapInts(array2, array1);
+  }
+
+  public static boolean arraysOverlap(long[] array1, long[] array2) {
+    return array1.length <= array2.length ? overlapLongs(array1, array2) : 
overlapLongs(array2, array1);
+  }
+
+  public static boolean arraysOverlap(float[] array1, float[] array2) {
+    return array1.length <= array2.length ? overlapFloats(array1, array2) : 
overlapFloats(array2, array1);
+  }
+
+  public static boolean arraysOverlap(double[] array1, double[] array2) {
+    return array1.length <= array2.length ? overlapDoubles(array1, array2) : 
overlapDoubles(array2, array1);
+  }
+
+  public static boolean arraysOverlap(String[] array1, String[] array2) {
+    return array1.length <= array2.length ? overlapStrings(array1, array2) : 
overlapStrings(array2, array1);
+  }
+}
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
index 8e0311cd970..20dde176be7 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
@@ -557,6 +557,92 @@ public class ArrayTest extends 
CustomDataQueryClusterIntegrationTest {
     }
   }
 
+  @Test(dataProvider = "useBothQueryEngines")
+  public void testArraysOverlapWithLiterals(boolean useMultiStageQueryEngine)
+      throws Exception {
+    setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+
+    // INT array literals
+    JsonNode result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY[1,2], 
ARRAY[3,2])").get("resultTable");
+    assertTrue(result.get("rows").get(0).get(0).asBoolean());
+    result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY[1,2], 
ARRAY[3,4])").get("resultTable");
+    assertFalse(result.get("rows").get(0).get(0).asBoolean());
+
+    // LONG array literals (use large values to ensure LONG_ARRAY typing)
+    result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY[2147483648,2147483649], 
ARRAY[2147483650,2147483649])")
+        .get("resultTable");
+    assertTrue(result.get("rows").get(0).get(0).asBoolean());
+    result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY[2147483648,2147483649], 
ARRAY[2147483650,2147483651])")
+        .get("resultTable");
+    assertFalse(result.get("rows").get(0).get(0).asBoolean());
+
+    // DOUBLE array literals
+    result = postQuery(
+        "SELECT ARRAYS_OVERLAP(ARRAY[CAST(0.1 AS DOUBLE),CAST(0.2 AS DOUBLE)], 
ARRAY[CAST(0.3 AS DOUBLE),CAST(0.2 AS "
+            + "DOUBLE)])")
+        .get("resultTable");
+    assertTrue(result.get("rows").get(0).get(0).asBoolean());
+    result = postQuery(
+        "SELECT ARRAYS_OVERLAP(ARRAY[CAST(0.1 AS DOUBLE),CAST(0.2 AS DOUBLE)], 
ARRAY[CAST(0.3 AS DOUBLE),CAST(0.4 AS "
+            + "DOUBLE)])")
+        .get("resultTable");
+    assertFalse(result.get("rows").get(0).get(0).asBoolean());
+
+    // STRING array literals
+    result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY['a','b'], 
ARRAY['x','b'])").get("resultTable");
+    assertTrue(result.get("rows").get(0).get(0).asBoolean());
+    result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY['a','b'], 
ARRAY['x','y'])").get("resultTable");
+    assertFalse(result.get("rows").get(0).get(0).asBoolean());
+  }
+
+  @Test(dataProvider = "useBothQueryEngines")
+  public void testArraysOverlapWithColumns(boolean useMultiStageQueryEngine)
+      throws Exception {
+    setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+
+    // LONG array column always contains [0,1,2,3] in this dataset
+    String queryTrue = String.format(
+        "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, ARRAY[CAST(2 AS 
BIGINT), CAST(10 AS BIGINT)])",
+        getTableName(), LONG_ARRAY_COLUMN);
+    JsonNode jsonNode = postQuery(queryTrue);
+    
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(), 
getCountStarResult());
+
+    String queryFalse = String.format(
+        "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, ARRAY[CAST(10 AS 
BIGINT), CAST(11 AS BIGINT)])",
+        getTableName(), LONG_ARRAY_COLUMN);
+    jsonNode = postQuery(queryFalse);
+    
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(), 
0L);
+
+    // DOUBLE array column always contains [0.0,0.1,0.2,0.3]
+    String queryDoubleTrue = String.format(
+        "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, ARRAY[CAST(0.2 AS 
DOUBLE), CAST(1.0 AS DOUBLE)])",
+        getTableName(), DOUBLE_ARRAY_COLUMN);
+    jsonNode = postQuery(queryDoubleTrue);
+    
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(), 
getCountStarResult());
+
+    String queryDoubleFalse = String.format(
+        "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, ARRAY[CAST(9.9 AS 
DOUBLE), CAST(8.8 AS DOUBLE)])",
+        getTableName(), DOUBLE_ARRAY_COLUMN);
+    jsonNode = postQuery(queryDoubleFalse);
+    
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(), 
0L);
+  }
+
+  @Test(dataProvider = "useBothQueryEngines")
+  public void testArraysOverlapWithSameColumn(boolean useMultiStageQueryEngine)
+      throws Exception {
+    setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+    String queryLong = String.format(
+        "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, %s)", 
getTableName(), LONG_ARRAY_COLUMN, LONG_ARRAY_COLUMN);
+    JsonNode jsonNode = postQuery(queryLong);
+    
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(), 
getCountStarResult());
+
+    String queryDouble =
+        String.format("SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, %s)", 
getTableName(), DOUBLE_ARRAY_COLUMN,
+            DOUBLE_ARRAY_COLUMN);
+    jsonNode = postQuery(queryDouble);
+    
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(), 
getCountStarResult());
+  }
+
   @Test(dataProvider = "useBothQueryEngines")
   public void testStringArrayLiteral(boolean useMultiStageQueryEngine)
       throws Exception {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to