This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new cf6a9204f8b feat(common): add ARRAYHASANY scalar UDF for arrays
(int/long/float/double/string)\n\n- New PinotScalarFunction:
ArrayHasAnyScalarFunction (ARRAYHASANY)\n- Type-dispatched implementation via
FunctionInfo map\n- Integration tests: ArrayTest adds literals, column filters,
and same-column self-overlap checks\n- Minor planner wiring update in
PinotOperatorTable (#17156)
cf6a9204f8b is described below
commit cf6a9204f8bdb5ed941f35ad6d7dc78a7b27d57a
Author: Xiang Fu <[email protected]>
AuthorDate: Fri Nov 7 12:25:03 2025 -0800
feat(common): add ARRAYHASANY scalar UDF for arrays
(int/long/float/double/string)\n\n- New PinotScalarFunction:
ArrayHasAnyScalarFunction (ARRAYHASANY)\n- Type-dispatched implementation via
FunctionInfo map\n- Integration tests: ArrayTest adds literals, column filters,
and same-column self-overlap checks\n- Minor planner wiring update in
PinotOperatorTable (#17156)
---
.../scalar/array/ArraysOverlapScalarFunction.java | 181 +++++++++++++++++++++
.../pinot/integration/tests/custom/ArrayTest.java | 86 ++++++++++
2 files changed, 267 insertions(+)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/array/ArraysOverlapScalarFunction.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/array/ArraysOverlapScalarFunction.java
new file mode 100644
index 00000000000..438cad4ae4e
--- /dev/null
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/array/ArraysOverlapScalarFunction.java
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.function.scalar.array;
+
+import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet;
+import it.unimi.dsi.fastutil.floats.FloatOpenHashSet;
+import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
+import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
+import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nullable;
+import org.apache.calcite.sql.type.OperandTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlTypeFamily;
+import org.apache.pinot.common.function.FunctionInfo;
+import org.apache.pinot.common.function.PinotScalarFunction;
+import org.apache.pinot.common.function.sql.PinotSqlFunction;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.spi.annotations.ScalarFunction;
+
+
+@ScalarFunction(names = {"ARRAYS_OVERLAP", "ARRAYSOVERLAP"})
+public class ArraysOverlapScalarFunction implements PinotScalarFunction {
+
+ private static final Map<DataSchema.ColumnDataType, FunctionInfo>
+ TYPE_FUNCTION_INFO_MAP = new EnumMap<>(DataSchema.ColumnDataType.class);
+
+ static {
+ try {
+ TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.INT_ARRAY,
+ new
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap",
int[].class, int[].class),
+ ArraysOverlapScalarFunction.class, false));
+ TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.LONG_ARRAY,
+ new
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap",
long[].class, long[].class),
+ ArraysOverlapScalarFunction.class, false));
+ TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.FLOAT_ARRAY,
+ new
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap",
float[].class, float[].class),
+ ArraysOverlapScalarFunction.class, false));
+ TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.DOUBLE_ARRAY,
+ new
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap",
double[].class, double[].class),
+ ArraysOverlapScalarFunction.class, false));
+ TYPE_FUNCTION_INFO_MAP.put(DataSchema.ColumnDataType.STRING_ARRAY,
+ new
FunctionInfo(ArraysOverlapScalarFunction.class.getMethod("arraysOverlap",
String[].class, String[].class),
+ ArraysOverlapScalarFunction.class, false));
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public String getName() {
+ return "ARRAYS_OVERLAP";
+ }
+
+ @Override
+ public Set<String> getNames() {
+ return Set.of("ARRAYS_OVERLAP", "ARRAYSOVERLAP");
+ }
+
+ @Nullable
+ @Override
+ public PinotSqlFunction toPinotSqlFunction() {
+ return new PinotSqlFunction("ARRAYS_OVERLAP", ReturnTypes.BOOLEAN,
+ OperandTypes.family(List.of(SqlTypeFamily.ARRAY,
SqlTypeFamily.ARRAY)));
+ }
+
+ @Nullable
+ @Override
+ public FunctionInfo getFunctionInfo(DataSchema.ColumnDataType[]
argumentTypes) {
+ if (argumentTypes.length != 2) {
+ return null;
+ }
+ if (argumentTypes[0] != argumentTypes[1]) {
+ return null;
+ }
+ return TYPE_FUNCTION_INFO_MAP.get(argumentTypes[0]);
+ }
+
+ @Nullable
+ @Override
+ public FunctionInfo getFunctionInfo(int numArguments) {
+ if (numArguments != 2) {
+ return null;
+ }
+ // Fall back to string
+ return getFunctionInfo(new DataSchema.ColumnDataType[]{
+ DataSchema.ColumnDataType.STRING_ARRAY,
+ DataSchema.ColumnDataType.STRING_ARRAY
+ });
+ }
+
+ private static boolean overlapInts(int[] small, int[] large) {
+ IntOpenHashSet set = new IntOpenHashSet(small);
+ for (int v : large) {
+ if (set.contains(v)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean overlapLongs(long[] small, long[] large) {
+ LongOpenHashSet set = new LongOpenHashSet(small);
+ for (long v : large) {
+ if (set.contains(v)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean overlapFloats(float[] small, float[] large) {
+ FloatOpenHashSet set = new FloatOpenHashSet(small);
+ for (float v : large) {
+ if (set.contains(v)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean overlapDoubles(double[] small, double[] large) {
+ DoubleOpenHashSet set = new DoubleOpenHashSet(small);
+
+ for (double v : large) {
+ if (set.contains(v)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean overlapStrings(String[] small, String[] large) {
+ ObjectOpenHashSet<String> set = new ObjectOpenHashSet<>(small);
+ for (String v : large) {
+ if (set.contains(v)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public static boolean arraysOverlap(int[] array1, int[] array2) {
+ return array1.length <= array2.length ? overlapInts(array1, array2) :
overlapInts(array2, array1);
+ }
+
+ public static boolean arraysOverlap(long[] array1, long[] array2) {
+ return array1.length <= array2.length ? overlapLongs(array1, array2) :
overlapLongs(array2, array1);
+ }
+
+ public static boolean arraysOverlap(float[] array1, float[] array2) {
+ return array1.length <= array2.length ? overlapFloats(array1, array2) :
overlapFloats(array2, array1);
+ }
+
+ public static boolean arraysOverlap(double[] array1, double[] array2) {
+ return array1.length <= array2.length ? overlapDoubles(array1, array2) :
overlapDoubles(array2, array1);
+ }
+
+ public static boolean arraysOverlap(String[] array1, String[] array2) {
+ return array1.length <= array2.length ? overlapStrings(array1, array2) :
overlapStrings(array2, array1);
+ }
+}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
index 8e0311cd970..20dde176be7 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java
@@ -557,6 +557,92 @@ public class ArrayTest extends
CustomDataQueryClusterIntegrationTest {
}
}
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testArraysOverlapWithLiterals(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+
+ // INT array literals
+ JsonNode result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY[1,2],
ARRAY[3,2])").get("resultTable");
+ assertTrue(result.get("rows").get(0).get(0).asBoolean());
+ result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY[1,2],
ARRAY[3,4])").get("resultTable");
+ assertFalse(result.get("rows").get(0).get(0).asBoolean());
+
+ // LONG array literals (use large values to ensure LONG_ARRAY typing)
+ result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY[2147483648,2147483649],
ARRAY[2147483650,2147483649])")
+ .get("resultTable");
+ assertTrue(result.get("rows").get(0).get(0).asBoolean());
+ result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY[2147483648,2147483649],
ARRAY[2147483650,2147483651])")
+ .get("resultTable");
+ assertFalse(result.get("rows").get(0).get(0).asBoolean());
+
+ // DOUBLE array literals
+ result = postQuery(
+ "SELECT ARRAYS_OVERLAP(ARRAY[CAST(0.1 AS DOUBLE),CAST(0.2 AS DOUBLE)],
ARRAY[CAST(0.3 AS DOUBLE),CAST(0.2 AS "
+ + "DOUBLE)])")
+ .get("resultTable");
+ assertTrue(result.get("rows").get(0).get(0).asBoolean());
+ result = postQuery(
+ "SELECT ARRAYS_OVERLAP(ARRAY[CAST(0.1 AS DOUBLE),CAST(0.2 AS DOUBLE)],
ARRAY[CAST(0.3 AS DOUBLE),CAST(0.4 AS "
+ + "DOUBLE)])")
+ .get("resultTable");
+ assertFalse(result.get("rows").get(0).get(0).asBoolean());
+
+ // STRING array literals
+ result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY['a','b'],
ARRAY['x','b'])").get("resultTable");
+ assertTrue(result.get("rows").get(0).get(0).asBoolean());
+ result = postQuery("SELECT ARRAYS_OVERLAP(ARRAY['a','b'],
ARRAY['x','y'])").get("resultTable");
+ assertFalse(result.get("rows").get(0).get(0).asBoolean());
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testArraysOverlapWithColumns(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+
+ // LONG array column always contains [0,1,2,3] in this dataset
+ String queryTrue = String.format(
+ "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, ARRAY[CAST(2 AS
BIGINT), CAST(10 AS BIGINT)])",
+ getTableName(), LONG_ARRAY_COLUMN);
+ JsonNode jsonNode = postQuery(queryTrue);
+
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(),
getCountStarResult());
+
+ String queryFalse = String.format(
+ "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, ARRAY[CAST(10 AS
BIGINT), CAST(11 AS BIGINT)])",
+ getTableName(), LONG_ARRAY_COLUMN);
+ jsonNode = postQuery(queryFalse);
+
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(),
0L);
+
+ // DOUBLE array column always contains [0.0,0.1,0.2,0.3]
+ String queryDoubleTrue = String.format(
+ "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, ARRAY[CAST(0.2 AS
DOUBLE), CAST(1.0 AS DOUBLE)])",
+ getTableName(), DOUBLE_ARRAY_COLUMN);
+ jsonNode = postQuery(queryDoubleTrue);
+
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(),
getCountStarResult());
+
+ String queryDoubleFalse = String.format(
+ "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, ARRAY[CAST(9.9 AS
DOUBLE), CAST(8.8 AS DOUBLE)])",
+ getTableName(), DOUBLE_ARRAY_COLUMN);
+ jsonNode = postQuery(queryDoubleFalse);
+
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(),
0L);
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testArraysOverlapWithSameColumn(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ String queryLong = String.format(
+ "SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, %s)",
getTableName(), LONG_ARRAY_COLUMN, LONG_ARRAY_COLUMN);
+ JsonNode jsonNode = postQuery(queryLong);
+
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(),
getCountStarResult());
+
+ String queryDouble =
+ String.format("SELECT COUNT(*) FROM %s WHERE ARRAYS_OVERLAP(%s, %s)",
getTableName(), DOUBLE_ARRAY_COLUMN,
+ DOUBLE_ARRAY_COLUMN);
+ jsonNode = postQuery(queryDouble);
+
assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(),
getCountStarResult());
+ }
+
@Test(dataProvider = "useBothQueryEngines")
public void testStringArrayLiteral(boolean useMultiStageQueryEngine)
throws Exception {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]