This is an automated email from the ASF dual-hosted git repository.
jiajunxie pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 821f03be6a [CALCITE-5704] Add ARRAY_EXCEPT, ARRAY_INTERSECT,
ARRAY_UNION function (enabled in Spark library)
821f03be6a is described below
commit 821f03be6a99d44d2e9f9380354f28e884430602
Author: yongen.ly <[email protected]>
AuthorDate: Mon May 15 12:52:09 2023 +0800
[CALCITE-5704] Add ARRAY_EXCEPT, ARRAY_INTERSECT, ARRAY_UNION function
(enabled in Spark library)
---
.../calcite/adapter/enumerable/RexImpTable.java | 9 ++-
.../org/apache/calcite/runtime/SqlFunctions.java | 22 ++++++
.../main/java/org/apache/calcite/sql/SqlKind.java | 12 ++++
.../calcite/sql/fun/SqlLibraryOperators.java | 51 ++++++++++----
.../org/apache/calcite/util/BuiltInMethod.java | 3 +
site/_docs/reference.md | 5 +-
.../org/apache/calcite/test/SqlOperatorTest.java | 82 +++++++++++++++++++---
7 files changed, 160 insertions(+), 24 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index fb764466ab..f55ed21542 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -122,12 +122,15 @@ import static
org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_COMPACT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_CONCAT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_CONCAT_AGG;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_DISTINCT;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_EXCEPT;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_INTERSECT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_LENGTH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_MAX;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_MIN;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_REPEAT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_REVERSE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_SIZE;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_UNION;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ASINH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ATANH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.BOOL_AND;
@@ -686,19 +689,21 @@ public class RexImpTable {
// Multisets & arrays
defineMethod(CARDINALITY, BuiltInMethod.COLLECTION_SIZE.method,
NullPolicy.STRICT);
- defineMethod(ARRAY_LENGTH, BuiltInMethod.COLLECTION_SIZE.method,
- NullPolicy.STRICT);
defineMethod(SLICE, BuiltInMethod.SLICE.method, NullPolicy.NONE);
defineMethod(ELEMENT, BuiltInMethod.ELEMENT.method, NullPolicy.STRICT);
defineMethod(STRUCT_ACCESS, BuiltInMethod.STRUCT_ACCESS.method,
NullPolicy.ANY);
defineMethod(MEMBER_OF, BuiltInMethod.MEMBER_OF.method, NullPolicy.NONE);
defineMethod(ARRAY_COMPACT, BuiltInMethod.ARRAY_COMPACT.method,
NullPolicy.STRICT);
defineMethod(ARRAY_DISTINCT, BuiltInMethod.ARRAY_DISTINCT.method,
NullPolicy.STRICT);
+ defineMethod(ARRAY_EXCEPT, BuiltInMethod.ARRAY_EXCEPT.method,
NullPolicy.ANY);
+ defineMethod(ARRAY_INTERSECT, BuiltInMethod.ARRAY_INTERSECT.method,
NullPolicy.ANY);
+ defineMethod(ARRAY_LENGTH, BuiltInMethod.COLLECTION_SIZE.method,
NullPolicy.STRICT);
defineMethod(ARRAY_MAX, BuiltInMethod.ARRAY_MAX.method,
NullPolicy.STRICT);
defineMethod(ARRAY_MIN, BuiltInMethod.ARRAY_MIN.method,
NullPolicy.STRICT);
defineMethod(ARRAY_REPEAT, BuiltInMethod.ARRAY_REPEAT.method,
NullPolicy.NONE);
defineMethod(ARRAY_REVERSE, BuiltInMethod.ARRAY_REVERSE.method,
NullPolicy.STRICT);
defineMethod(ARRAY_SIZE, BuiltInMethod.COLLECTION_SIZE.method,
NullPolicy.STRICT);
+ defineMethod(ARRAY_UNION, BuiltInMethod.ARRAY_UNION.method,
NullPolicy.ANY);
defineMethod(MAP_ENTRIES, BuiltInMethod.MAP_ENTRIES.method,
NullPolicy.STRICT);
defineMethod(MAP_KEYS, BuiltInMethod.MAP_KEYS.method, NullPolicy.STRICT);
defineMethod(MAP_VALUES, BuiltInMethod.MAP_VALUES.method,
NullPolicy.STRICT);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index a3b92ee6fb..e1835b49d0 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -3926,6 +3926,28 @@ public class SqlFunctions {
return Collections.nCopies(numberOfElement, element);
}
+ /** Support the ARRAY_EXCEPT function. */
+ public static List arrayExcept(List list1, List list2) {
+ final Set result = new LinkedHashSet<>(list1);
+ result.removeAll(list2);
+ return new ArrayList<>(result);
+ }
+
+ /** Support the ARRAY_INTERSECT function. */
+ public static List arrayIntersect(List list1, List list2) {
+ final Set result = new LinkedHashSet<>(list1);
+ result.retainAll(list2);
+ return new ArrayList<>(result);
+ }
+
+ /** Support the ARRAY_UNION function. */
+ public static List arrayUnion(List list1, List list2) {
+ final Set result = new LinkedHashSet<>();
+ result.addAll(list1);
+ result.addAll(list2);
+ return new ArrayList<>(result);
+ }
+
/** Support the SORT_ARRAY function. */
public static List sortArray(List list, boolean ascending) {
Comparator comparator = ascending
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
index 86f85ba4ba..a53936c201 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
@@ -686,6 +686,15 @@ public enum SqlKind {
/** {@code ARRAY_DISTINCT} function (Spark semantics). */
ARRAY_DISTINCT,
+ /** {@code ARRAY_EXCEPT} function (Spark semantics). */
+ ARRAY_EXCEPT,
+
+ /** {@code ARRAY_INTERSECT} function (Spark semantics). */
+ ARRAY_INTERSECT,
+
+ /** {@code ARRAY_LENGTH} function (Spark semantics). */
+ ARRAY_LENGTH,
+
/** {@code ARRAY_MAX} function (Spark semantics). */
ARRAY_MAX,
@@ -701,6 +710,9 @@ public enum SqlKind {
/** {@code ARRAY_SIZE} function (Spark semantics). */
ARRAY_SIZE,
+ /** {@code ARRAY_UNION} function (Spark semantics). */
+ ARRAY_UNION,
+
/** {@code SORT_ARRAY} function (Spark semantics). */
SORT_ARRAY,
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 05021ed3b9..1a09e98e23 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -872,6 +872,20 @@ public abstract class SqlLibraryOperators {
SqlLibraryOperators::arrayReturnType,
OperandTypes.SAME_VARIADIC);
+ /** The "ARRAY_COMPACT(array)" function. */
+ @LibraryOperator(libraries = {SPARK})
+ public static final SqlFunction ARRAY_COMPACT =
+ SqlBasicFunction.create(SqlKind.ARRAY_COMPACT,
+ ReturnTypes.ARG0_NULLABLE,
+ OperandTypes.ARRAY);
+
+ /** The "ARRAY_CONCAT(array [, array]*)" function. */
+ @LibraryOperator(libraries = {BIG_QUERY})
+ public static final SqlFunction ARRAY_CONCAT =
+ SqlBasicFunction.create(SqlKind.ARRAY_CONCAT,
+ ReturnTypes.LEAST_RESTRICTIVE,
+ OperandTypes.AT_LEAST_ONE_SAME_VARIADIC);
+
/** The "ARRAY_DISTINCT(array)" function (Spark). */
@LibraryOperator(libraries = {SPARK})
public static final SqlFunction ARRAY_DISTINCT =
@@ -879,10 +893,28 @@ public abstract class SqlLibraryOperators {
ReturnTypes.ARG0_NULLABLE,
OperandTypes.ARRAY);
+ /** The "ARRAY_EXCEPT(array1, array2)" function. */
+ @LibraryOperator(libraries = {SPARK})
+ public static final SqlFunction ARRAY_EXCEPT =
+ SqlBasicFunction.create(SqlKind.ARRAY_EXCEPT,
+ ReturnTypes.LEAST_RESTRICTIVE,
+ OperandTypes.and(
+ OperandTypes.SAME_SAME,
+ OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.ARRAY)));
+
+ /** The "ARRAY_INTERSECT(array1, array2)" function. */
+ @LibraryOperator(libraries = {SPARK})
+ public static final SqlFunction ARRAY_INTERSECT =
+ SqlBasicFunction.create(SqlKind.ARRAY_INTERSECT,
+ ReturnTypes.LEAST_RESTRICTIVE,
+ OperandTypes.and(
+ OperandTypes.SAME_SAME,
+ OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.ARRAY)));
+
/** The "ARRAY_LENGTH(array)" function. */
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction ARRAY_LENGTH =
- SqlBasicFunction.create("ARRAY_LENGTH",
+ SqlBasicFunction.create(SqlKind.ARRAY_LENGTH,
ReturnTypes.INTEGER_NULLABLE,
OperandTypes.ARRAY);
@@ -923,19 +955,14 @@ public abstract class SqlLibraryOperators {
ReturnTypes.INTEGER_NULLABLE,
OperandTypes.ARRAY);
- /** The "ARRAY_COMPACT(array)" function. */
+ /** The "ARRAY_UNION(array1, array2)" function. */
@LibraryOperator(libraries = {SPARK})
- public static final SqlFunction ARRAY_COMPACT =
- SqlBasicFunction.create(SqlKind.ARRAY_COMPACT,
- ReturnTypes.ARG0_NULLABLE,
- OperandTypes.ARRAY);
-
- /** The "ARRAY_CONCAT(array [, array]*)" function. */
- @LibraryOperator(libraries = {BIG_QUERY})
- public static final SqlFunction ARRAY_CONCAT =
- SqlBasicFunction.create(SqlKind.ARRAY_CONCAT,
+ public static final SqlFunction ARRAY_UNION =
+ SqlBasicFunction.create(SqlKind.ARRAY_UNION,
ReturnTypes.LEAST_RESTRICTIVE,
- OperandTypes.AT_LEAST_ONE_SAME_VARIADIC);
+ OperandTypes.and(
+ OperandTypes.SAME_SAME,
+ OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.ARRAY)));
/** The "SORT_ARRAY(array)" function (Spark). */
@LibraryOperator(libraries = {SPARK})
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index ad1857bfc1..f9e7102def 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -633,6 +633,9 @@ public enum BuiltInMethod {
ARRAY_MAX(SqlFunctions.class, "arrayMax", List.class),
ARRAY_MIN(SqlFunctions.class, "arrayMin", List.class),
ARRAY_REPEAT(SqlFunctions.class, "repeat", Object.class, Integer.class),
+ ARRAY_EXCEPT(SqlFunctions.class, "arrayExcept", List.class, List.class),
+ ARRAY_INTERSECT(SqlFunctions.class, "arrayIntersect", List.class,
List.class),
+ ARRAY_UNION(SqlFunctions.class, "arrayUnion", List.class, List.class),
ARRAY_REVERSE(SqlFunctions.class, "reverse", List.class),
SORT_ARRAY(SqlFunctions.class, "sortArray", List.class, boolean.class),
MAP_ENTRIES(SqlFunctions.class, "mapEntries", Map.class),
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index d9b486ba80..62c04898a2 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2654,13 +2654,16 @@ BigQuery's type system uses confusingly different names
for types and functions:
| s | ARRAY(expr [, expr ]*) | Construct an array in
Apache Spark
| s | ARRAY_COMPACT(array) | Removes null values
from the *array*
| b | ARRAY_CONCAT(array [, array ]*) | Concatenates one or
more arrays. If any input argument is `NULL` the function returns `NULL`
-| s | ARRAY_DISTINCT(array) | Returns unique elements
of *array*. Keeps ordering of elements.
+| s | ARRAY_DISTINCT(array) | Removes duplicate
values from the *array* that keeps ordering of elements
+| s | ARRAY_EXCEPT(array1, array2) | Returns an array of the
elements in *array1* but not in *array2*, without duplicates
+| s | ARRAY_INTERSECT(array1, array2) | Returns an array of the
elements in the intersection of *array1* and *array2*, without duplicates
| b | ARRAY_LENGTH(array) | Synonym for
`CARDINALITY`
| s | ARRAY_MAX(array) | Returns the maximum
value in the *array*
| s | ARRAY_MIN(array) | Returns the minimum
value in the *array*
| s | ARRAY_REPEAT(element, count) | Returns the array
containing element count times.
| b | ARRAY_REVERSE(array) | Reverses elements of
*array*
| s | ARRAY_SIZE(array) | Synonym for
`CARDINALITY`
+| s | ARRAY_UNION(array1, array2) | Returns an array of the
elements in the union of *array1* and *array2*, without duplicates
| s | SORT_ARRAY(array [, ascendingOrder]) | Sorts the *array* in
ascending or descending order according to the natural ordering of the array
elements. The default order is ascending if *ascendingOrder* is not specified.
Null elements will be placed at the beginning of the returned array in
ascending order or at the end of the returned array in descending order
| * | ASINH(numeric) | Returns the inverse
hyperbolic sine of *numeric*
| * | ATANH(numeric) | Returns the inverse
hyperbolic tangent of *numeric*
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index b4181e0288..500a501573 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -5366,9 +5366,11 @@ public class SqlOperatorTest {
/** Tests {@code ARRAY_DISTINCT} function from Spark. */
@Test void testArrayDistinctFunc() {
- SqlOperatorFixture f = fixture()
- .setFor(SqlLibraryOperators.ARRAY_DISTINCT)
- .withLibrary(SqlLibrary.SPARK);
+ final SqlOperatorFixture f0 = fixture();
+ f0.setFor(SqlLibraryOperators.ARRAY_DISTINCT);
+ f0.checkFails("^array_distinct(array['foo'])^",
+ "No match found for function signature ARRAY_DISTINCT\\(<CHAR\\(3\\)
ARRAY>\\)", false);
+ final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK);
f.checkScalar("array_distinct(array[1, 2, 2, 1])", "[1, 2]",
"INTEGER NOT NULL ARRAY NOT NULL");
f.checkScalar("array_distinct(array[null, 1, null])", "[null, 1]",
@@ -5430,9 +5432,11 @@ public class SqlOperatorTest {
/** Tests {@code ARRAY_REVERSE} function from BigQuery. */
@Test void testArrayReverseFunc() {
- SqlOperatorFixture f = fixture()
- .setFor(SqlLibraryOperators.ARRAY_REVERSE)
- .withLibrary(SqlLibrary.BIG_QUERY);
+ final SqlOperatorFixture f0 = fixture();
+ f0.setFor(SqlLibraryOperators.ARRAY_REVERSE);
+ f0.checkFails("^array_reverse(array[1])^",
+ "No match found for function signature ARRAY_REVERSE\\(<INTEGER
ARRAY>\\)", false);
+ final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.BIG_QUERY);
f.checkScalar("array_reverse(array[1])", "[1]",
"INTEGER NOT NULL ARRAY NOT NULL");
f.checkScalar("array_reverse(array[1, 2])", "[2, 1]",
@@ -5458,9 +5462,11 @@ public class SqlOperatorTest {
/** Tests {@code ARRAY_LENGTH} function from BigQuery. */
@Test void testArrayLengthFunc() {
- SqlOperatorFixture f = fixture()
- .setFor(SqlLibraryOperators.ARRAY_LENGTH)
- .withLibrary(SqlLibrary.BIG_QUERY);
+ final SqlOperatorFixture f0 = fixture();
+ f0.setFor(SqlLibraryOperators.ARRAY_LENGTH);
+ f0.checkFails("^array_length(array[1])^",
+ "No match found for function signature ARRAY_LENGTH\\(<INTEGER
ARRAY>\\)", false);
+ final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.BIG_QUERY);
f.checkScalar("array_length(array[1])", "1",
"INTEGER NOT NULL");
f.checkScalar("array_length(array[1, 2, null])", "3",
@@ -5468,6 +5474,64 @@ public class SqlOperatorTest {
f.checkNull("array_length(null)");
}
+ /** Tests {@code ARRAY_EXCEPT} function from Spark. */
+ @Test void testArrayExceptFunc() {
+ final SqlOperatorFixture f0 = fixture();
+ f0.setFor(SqlLibraryOperators.ARRAY_EXCEPT);
+ f0.checkFails("^array_except(array[2, null, 3, 3], array[1, 2, null])^",
+ "No match found for function signature "
+ + "ARRAY_EXCEPT\\(<INTEGER ARRAY>, <INTEGER ARRAY>\\)", false);
+
+ final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK);
+ f.checkScalar("array_except(array[2, 3, 3], array[2])",
+ "[3]", "INTEGER NOT NULL ARRAY NOT NULL");
+ f.checkScalar("array_except(array[2], array[2, 3])",
+ "[]", "INTEGER NOT NULL ARRAY NOT NULL");
+ f.checkScalar("array_except(array[2, null, 3, 3], array[1, 2, null])",
+ "[3]", "INTEGER ARRAY NOT NULL");
+ f.checkNull("array_except(cast(null as integer array), array[1])");
+ f.checkNull("array_except(array[1], cast(null as integer array))");
+ f.checkNull("array_except(cast(null as integer array), cast(null as
integer array))");
+ }
+
+ /** Tests {@code ARRAY_INTERSECT} function from Spark. */
+ @Test void testArrayIntersectFunc() {
+ final SqlOperatorFixture f0 = fixture();
+ f0.setFor(SqlLibraryOperators.ARRAY_INTERSECT);
+ f0.checkFails("^array_intersect(array[2, null, 2], array[1, 2, null])^",
+ "No match found for function signature "
+ + "ARRAY_INTERSECT\\(<INTEGER ARRAY>, <INTEGER ARRAY>\\)", false);
+
+ final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK);
+ f.checkScalar("array_intersect(array[2, 3, 3], array[3])",
+ "[3]", "INTEGER NOT NULL ARRAY NOT NULL");
+ f.checkScalar("array_intersect(array[1], array[2, 3])",
+ "[]", "INTEGER NOT NULL ARRAY NOT NULL");
+ f.checkScalar("array_intersect(array[2, null, 2], array[1, 2, null])",
+ "[2, null]", "INTEGER ARRAY NOT NULL");
+ f.checkNull("array_intersect(cast(null as integer array), array[1])");
+ f.checkNull("array_intersect(array[1], cast(null as integer array))");
+ f.checkNull("array_intersect(cast(null as integer array), cast(null as
integer array))");
+ }
+
+ /** Tests {@code ARRAY_UNION} function from Spark. */
+ @Test void testArrayUnionFunc() {
+ final SqlOperatorFixture f0 = fixture();
+ f0.setFor(SqlLibraryOperators.ARRAY_UNION);
+ f0.checkFails("^array_union(array[2, null, 2], array[1, 2, null])^",
+ "No match found for function signature "
+ + "ARRAY_UNION\\(<INTEGER ARRAY>, <INTEGER ARRAY>\\)", false);
+
+ final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK);
+ f.checkScalar("array_intersect(array[2, 3, 3], array[3])",
+ "[3]", "INTEGER NOT NULL ARRAY NOT NULL");
+ f.checkScalar("array_union(array[2, null, 2], array[1, 2, null])",
+ "[2, null, 1]", "INTEGER ARRAY NOT NULL");
+ f.checkNull("array_union(cast(null as integer array), array[1])");
+ f.checkNull("array_union(array[1], cast(null as integer array))");
+ f.checkNull("array_union(cast(null as integer array), cast(null as integer
array))");
+ }
+
/** Tests {@code SORT_ARRAY} function from Spark. */
@Test void testSortArrayFunc() {
final SqlOperatorFixture f0 = fixture();