This is an automated email from the ASF dual-hosted git repository. snuyanzin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git
The following commit(s) were added to refs/heads/master by this push: new 030511111a3 [FLINK-35423][table] ARRAY_EXCEPT should follow set semantics 030511111a3 is described below commit 030511111a38a4e50e52c71a489bd4e3d021da3b Author: Sergey Nuyanzin <snuyan...@gmail.com> AuthorDate: Fri Jun 7 11:14:03 2024 +0200 [FLINK-35423][table] ARRAY_EXCEPT should follow set semantics --- docs/data/sql_functions.yml | 12 ++++++------ docs/data/sql_functions_zh.yml | 9 +++++++++ flink-python/pyflink/table/expression.py | 3 ++- .../table/planner/functions/CollectionFunctionsITCase.java | 12 ++++++------ .../table/runtime/functions/scalar/ArrayExceptFunction.java | 13 ++++++------- 5 files changed, 29 insertions(+), 20 deletions(-) diff --git a/docs/data/sql_functions.yml b/docs/data/sql_functions.yml index 6901891aeb8..5abafdba8ae 100644 --- a/docs/data/sql_functions.yml +++ b/docs/data/sql_functions.yml @@ -661,6 +661,12 @@ collection: - sql: ARRAY_CONCAT(array1, ...) table: array1.arrayConcat(...) description: Returns an array that is the result of concatenating at least one array. This array contains all the elements in the first array, followed by all the elements in the second array, and so forth, up to the Nth array. If any input array is NULL, the function returns NULL. + - sql: ARRAY_EXCEPT(array1, array2) + table: arrayOne.arrayExcept(arrayTwo) + description: Returns an ARRAY that contains the elements from array1 that are not in array2, without duplicates. If no elements remain after excluding the elements in array2 from array1, the function returns an empty ARRAY. If one or both arguments are NULL, the function returns NULL. The order of the elements from array1 is kept. + - sql: ARRAY_INTERSECT(array1, array2) + table: array1.arrayIntersect(array2) + description: Returns an ARRAY that contains the elements from array1 that are also in array2, without duplicates. If no elements that are both in array1 and array2, the function returns an empty ARRAY. If any of the array is null, the function will return null. The order of the elements from array1 is kept. - sql: ARRAY_MAX(array) table: array.arrayMax() description: Returns the maximum value from the array, if array itself is null, the function returns null. @@ -685,12 +691,6 @@ collection: - sql: MAP_FROM_ARRAYS(array_of_keys, array_of_values) table: mapFromArrays(array_of_keys, array_of_values) description: Returns a map created from an arrays of keys and values. Note that the lengths of two arrays should be the same. - - sql: ARRAY_EXCEPT(array1, array2) - table: arrayOne.arrayExcept(arrayTwo) - description: Returns an ARRAY that contains the elements from array1 that are not in array2. If no elements remain after excluding the elements in array2 from array1, the function returns an empty ARRAY. If one or both arguments are NULL, the function returns NULL. The order of the elements from array1 is kept. - - sql: ARRAY_INTERSECT(array1, array2) - table: array1.arrayIntersect(array2) - description: Returns an ARRAY that contains the elements from array1 that are also in array2, without duplicates. If no elements that are both in array1 and array2, the function returns an empty ARRAY. If any of the array is null, the function will return null. The order of the elements from array1 is kept. - sql: SPLIT(string, delimiter) table: string.split(delimiter) description: Returns an array of substrings by splitting the input string based on the given delimiter. If the delimiter is not found in the string, the original string is returned as the only element in the array. If the delimiter is empty, every character in the string is split. If the string or delimiter is null, a null value is returned. If the delimiter is found at the beginning or end of the string, or there are contiguous delimiters, then an empty string is added to the array. diff --git a/docs/data/sql_functions_zh.yml b/docs/data/sql_functions_zh.yml index fbb3e5d8415..584578c77f3 100644 --- a/docs/data/sql_functions_zh.yml +++ b/docs/data/sql_functions_zh.yml @@ -796,6 +796,12 @@ collection: - sql: ARRAY_CONCAT(array1, ...) table: array1.arrayConcat(...) description: 返回一个数组,该数组是连接至少一个数组的结果。该数组包含第一个数组中的所有元素,然后是第二个数组中的所有元素,依此类推,直到第 N 个数组。如果任何输入数组为 NULL,则函数返回 NULL。 + - sql: ARRAY_EXCEPT(array1, array2) + table: arrayOne.arrayExcept(arrayTwo) + description: Returns an ARRAY that contains the elements from array1 that are not in array2, without duplicates. If no elements remain after excluding the elements in array2 from array1, the function returns an empty ARRAY. If one or both arguments are NULL, the function returns NULL. The order of the elements from array1 is kept. + - sql: ARRAY_INTERSECT(array1, array2) + table: array1.arrayIntersect(array2) + description: Returns an ARRAY that contains the elements from array1 that are also in array2, without duplicates. If no elements that are both in array1 and array2, the function returns an empty ARRAY. If any of the array is null, the function will return null. The order of the elements from array1 is kept. - sql: ARRAY_MAX(array) table: array.arrayMax() description: 返回数组中的最大值,如果数组是 null,则返回 null。 @@ -817,6 +823,9 @@ collection: - sql: MAP_UNION(map1, map2) table: map1.mapUnion(map2) description: 返回一个通过合并两个图 'map1' 和 'map2' 创建的图。这两个图应该具有共同的图类型。如果有重叠的键,'map2' 的值将覆盖 'map1' 的值。如果任一图为空,则返回 null。 + - sql: SPLIT(string, delimiter) + table: string.split(delimiter) + description: Returns an array of substrings by splitting the input string based on the given delimiter. If the delimiter is not found in the string, the original string is returned as the only element in the array. If the delimiter is empty, every character in the string is split. If the string or delimiter is null, a null value is returned. If the delimiter is found at the beginning or end of the string, or there are contiguous delimiters, then an empty string is added to the array. json: - sql: IS JSON [ { VALUE | SCALAR | ARRAY | OBJECT } ] diff --git a/flink-python/pyflink/table/expression.py b/flink-python/pyflink/table/expression.py index 9bfcb7746bb..d83398e60b7 100644 --- a/flink-python/pyflink/table/expression.py +++ b/flink-python/pyflink/table/expression.py @@ -1614,7 +1614,8 @@ class Expression(Generic[T]): Returns an ARRAY that contains the elements from array1 that are not in array2. If no elements remain after excluding the elements in array2 from array1, the function returns an empty ARRAY. If one or both arguments are NULL, - the function returns NULL. The order of the elements from array1 is kept. + the function returns NULL. The order of the elements from array1 is kept + however the duplicates are removed. """ return _binary_op("arrayExcept")(self, array) diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java index 4f6cb758914..230bf290c0a 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java @@ -1634,22 +1634,22 @@ class CollectionFunctionsITCase extends BuiltInFunctionTestBase { .testResult( $("f0").arrayExcept(new Integer[] {1, null, 4}), "ARRAY_EXCEPT(f0, ARRAY[1, NULL, 4])", - new Integer[] {2, 2}, + new Integer[] {2}, DataTypes.ARRAY(DataTypes.INT()).nullable()) .testResult( $("f0").arrayExcept(new Integer[] {1}), "ARRAY_EXCEPT(f0, ARRAY[1])", - new Integer[] {2, 2}, + new Integer[] {2}, DataTypes.ARRAY(DataTypes.INT()).nullable()) .testResult( $("f0").arrayExcept(new Integer[] {42}), "ARRAY_EXCEPT(f0, ARRAY[42])", - new Integer[] {1, 2, 2}, + new Integer[] {1, 2}, DataTypes.ARRAY(DataTypes.INT()).nullable()) .testResult( $("f6").arrayExcept(new Integer[] {2, 2}), "ARRAY_EXCEPT(f6, ARRAY[2, 2])", - new Integer[] {1, 3, 4, 2}, + new Integer[] {1, 3, 4}, DataTypes.ARRAY(DataTypes.INT()).nullable()) // arrayTwo is NULL .testResult( @@ -1663,7 +1663,7 @@ class CollectionFunctionsITCase extends BuiltInFunctionTestBase { .testResult( $("f0").arrayExcept(new Integer[] {null, 2}), "ARRAY_EXCEPT(f0, ARRAY[null, 2])", - new Integer[] {1, 2}, + new Integer[] {1}, DataTypes.ARRAY(DataTypes.INT()).nullable()) // arrayOne is NULL .testResult( @@ -1675,7 +1675,7 @@ class CollectionFunctionsITCase extends BuiltInFunctionTestBase { .testResult( $("f3").arrayExcept(new Integer[] {null, 42}), "ARRAY_EXCEPT(f3, ARRAY[null, 42])", - new Integer[] {null, 1}, + new Integer[] {1}, DataTypes.ARRAY(DataTypes.INT()).nullable()) // ARRAY<ROW<BOOLEAN, DATE>> .testResult( diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/scalar/ArrayExceptFunction.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/scalar/ArrayExceptFunction.java index 98125730214..68ffb5dbe4e 100644 --- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/scalar/ArrayExceptFunction.java +++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/scalar/ArrayExceptFunction.java @@ -33,9 +33,9 @@ import org.apache.flink.util.FlinkRuntimeException; import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.HashMap; +import java.util.HashSet; import java.util.List; -import java.util.Map; +import java.util.Set; /** Implementation of {@link BuiltInFunctionDefinitions#ARRAY_EXCEPT}. */ @Internal @@ -65,19 +65,18 @@ public class ArrayExceptFunction extends BuiltInScalarFunction { } List<Object> list = new ArrayList<>(); - Map<ObjectContainer, Integer> map = new HashMap<>(); + Set<ObjectContainer> set = new HashSet<>(); for (int pos = 0; pos < arrayTwo.size(); pos++) { final Object element = elementGetter.getElementOrNull(arrayTwo, pos); final ObjectContainer objectContainer = createObjectContainer(element); - map.merge(objectContainer, 1, (k, v) -> v + 1); + set.add(objectContainer); } for (int pos = 0; pos < arrayOne.size(); pos++) { final Object element = elementGetter.getElementOrNull(arrayOne, pos); final ObjectContainer objectContainer = createObjectContainer(element); - if (map.containsKey(objectContainer)) { - map.compute(objectContainer, (k, v) -> v == null || v == 1 ? null : v - 1); - } else { + if (!set.contains(objectContainer)) { list.add(element); + set.add(objectContainer); } } return new GenericArrayData(list.toArray());