This is an automated email from the ASF dual-hosted git repository.

snuyanzin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
     new 030511111a3 [FLINK-35423][table] ARRAY_EXCEPT should follow set 
semantics
030511111a3 is described below

commit 030511111a38a4e50e52c71a489bd4e3d021da3b
Author: Sergey Nuyanzin <snuyan...@gmail.com>
AuthorDate: Fri Jun 7 11:14:03 2024 +0200

    [FLINK-35423][table] ARRAY_EXCEPT should follow set semantics
---
 docs/data/sql_functions.yml                                 | 12 ++++++------
 docs/data/sql_functions_zh.yml                              |  9 +++++++++
 flink-python/pyflink/table/expression.py                    |  3 ++-
 .../table/planner/functions/CollectionFunctionsITCase.java  | 12 ++++++------
 .../table/runtime/functions/scalar/ArrayExceptFunction.java | 13 ++++++-------
 5 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/docs/data/sql_functions.yml b/docs/data/sql_functions.yml
index 6901891aeb8..5abafdba8ae 100644
--- a/docs/data/sql_functions.yml
+++ b/docs/data/sql_functions.yml
@@ -661,6 +661,12 @@ collection:
   - sql: ARRAY_CONCAT(array1, ...)
     table: array1.arrayConcat(...)
     description: Returns an array that is the result of concatenating at least 
one array. This array contains all the elements in the first array, followed by 
all the elements in the second array, and so forth, up to the Nth array. If any 
input array is NULL, the function returns NULL.
+  - sql: ARRAY_EXCEPT(array1, array2)
+    table: arrayOne.arrayExcept(arrayTwo)
+    description: Returns an ARRAY that contains the elements from array1 that 
are not in array2, without duplicates. If no elements remain after excluding 
the elements in array2 from array1, the function returns an empty ARRAY. If one 
or both arguments are NULL, the function returns NULL. The order of the 
elements from array1 is kept.
+  - sql: ARRAY_INTERSECT(array1, array2)
+    table: array1.arrayIntersect(array2)
+    description: Returns an ARRAY that contains the elements from array1 that 
are also in array2, without duplicates. If no elements that are both in array1 
and array2, the function returns an empty ARRAY. If any of the array is null, 
the function will return null. The order of the elements from array1 is kept.
   - sql: ARRAY_MAX(array)
     table: array.arrayMax()
     description: Returns the maximum value from the array, if array itself is 
null, the function returns null.
@@ -685,12 +691,6 @@ collection:
   - sql: MAP_FROM_ARRAYS(array_of_keys, array_of_values)
     table: mapFromArrays(array_of_keys, array_of_values)
     description: Returns a map created from an arrays of keys and values. Note 
that the lengths of two arrays should be the same.
-  - sql: ARRAY_EXCEPT(array1, array2)
-    table: arrayOne.arrayExcept(arrayTwo)
-    description: Returns an ARRAY that contains the elements from array1 that 
are not in array2. If no elements remain after excluding the elements in array2 
from array1, the function returns an empty ARRAY. If one or both arguments are 
NULL, the function returns NULL. The order of the elements from array1 is kept.
-  - sql: ARRAY_INTERSECT(array1, array2)
-    table: array1.arrayIntersect(array2)
-    description: Returns an ARRAY that contains the elements from array1 that 
are also in array2, without duplicates. If no elements that are both in array1 
and array2, the function returns an empty ARRAY. If any of the array is null, 
the function will return null. The order of the elements from array1 is kept.
   - sql: SPLIT(string, delimiter)
     table: string.split(delimiter)
     description: Returns an array of substrings by splitting the input string 
based on the given delimiter. If the delimiter is not found in the string, the 
original string is returned as the only element in the array. If the delimiter 
is empty, every character in the string is split. If the string or delimiter is 
null, a null value is returned. If the delimiter is found at the beginning or 
end of the string, or there are contiguous delimiters, then an empty string is 
added to the array.
diff --git a/docs/data/sql_functions_zh.yml b/docs/data/sql_functions_zh.yml
index fbb3e5d8415..584578c77f3 100644
--- a/docs/data/sql_functions_zh.yml
+++ b/docs/data/sql_functions_zh.yml
@@ -796,6 +796,12 @@ collection:
   - sql: ARRAY_CONCAT(array1, ...)
     table: array1.arrayConcat(...)
     description: 
返回一个数组,该数组是连接至少一个数组的结果。该数组包含第一个数组中的所有元素,然后是第二个数组中的所有元素,依此类推,直到第 N 个数组。如果任何输入数组为 
NULL,则函数返回 NULL。
+  - sql: ARRAY_EXCEPT(array1, array2)
+    table: arrayOne.arrayExcept(arrayTwo)
+    description: Returns an ARRAY that contains the elements from array1 that 
are not in array2, without duplicates. If no elements remain after excluding 
the elements in array2 from array1, the function returns an empty ARRAY. If one 
or both arguments are NULL, the function returns NULL. The order of the 
elements from array1 is kept.
+  - sql: ARRAY_INTERSECT(array1, array2)
+    table: array1.arrayIntersect(array2)
+    description: Returns an ARRAY that contains the elements from array1 that 
are also in array2, without duplicates. If no elements that are both in array1 
and array2, the function returns an empty ARRAY. If any of the array is null, 
the function will return null. The order of the elements from array1 is kept.
   - sql: ARRAY_MAX(array)
     table: array.arrayMax()
     description: 返回数组中的最大值,如果数组是 null,则返回 null。
@@ -817,6 +823,9 @@ collection:
   - sql: MAP_UNION(map1, map2)
     table: map1.mapUnion(map2)
     description: 返回一个通过合并两个图 'map1' 和 'map2' 
创建的图。这两个图应该具有共同的图类型。如果有重叠的键,'map2' 的值将覆盖 'map1' 的值。如果任一图为空,则返回 null。
+  - sql: SPLIT(string, delimiter)
+    table: string.split(delimiter)
+    description: Returns an array of substrings by splitting the input string 
based on the given delimiter. If the delimiter is not found in the string, the 
original string is returned as the only element in the array. If the delimiter 
is empty, every character in the string is split. If the string or delimiter is 
null, a null value is returned. If the delimiter is found at the beginning or 
end of the string, or there are contiguous delimiters, then an empty string is 
added to the array.
 
 json:
   - sql: IS JSON [ { VALUE | SCALAR | ARRAY | OBJECT } ]
diff --git a/flink-python/pyflink/table/expression.py 
b/flink-python/pyflink/table/expression.py
index 9bfcb7746bb..d83398e60b7 100644
--- a/flink-python/pyflink/table/expression.py
+++ b/flink-python/pyflink/table/expression.py
@@ -1614,7 +1614,8 @@ class Expression(Generic[T]):
         Returns an ARRAY that contains the elements from array1 that are not 
in array2.
         If no elements remain after excluding the elements in array2 from 
array1,
         the function returns an empty ARRAY. If one or both arguments are NULL,
-        the function returns NULL. The order of the elements from array1 is 
kept.
+        the function returns NULL. The order of the elements from array1 is 
kept
+        however the duplicates are removed.
         """
         return _binary_op("arrayExcept")(self, array)
 
diff --git 
a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java
 
b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java
index 4f6cb758914..230bf290c0a 100644
--- 
a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java
+++ 
b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java
@@ -1634,22 +1634,22 @@ class CollectionFunctionsITCase extends 
BuiltInFunctionTestBase {
                         .testResult(
                                 $("f0").arrayExcept(new Integer[] {1, null, 
4}),
                                 "ARRAY_EXCEPT(f0, ARRAY[1, NULL, 4])",
-                                new Integer[] {2, 2},
+                                new Integer[] {2},
                                 DataTypes.ARRAY(DataTypes.INT()).nullable())
                         .testResult(
                                 $("f0").arrayExcept(new Integer[] {1}),
                                 "ARRAY_EXCEPT(f0, ARRAY[1])",
-                                new Integer[] {2, 2},
+                                new Integer[] {2},
                                 DataTypes.ARRAY(DataTypes.INT()).nullable())
                         .testResult(
                                 $("f0").arrayExcept(new Integer[] {42}),
                                 "ARRAY_EXCEPT(f0, ARRAY[42])",
-                                new Integer[] {1, 2, 2},
+                                new Integer[] {1, 2},
                                 DataTypes.ARRAY(DataTypes.INT()).nullable())
                         .testResult(
                                 $("f6").arrayExcept(new Integer[] {2, 2}),
                                 "ARRAY_EXCEPT(f6, ARRAY[2, 2])",
-                                new Integer[] {1, 3, 4, 2},
+                                new Integer[] {1, 3, 4},
                                 DataTypes.ARRAY(DataTypes.INT()).nullable())
                         // arrayTwo is NULL
                         .testResult(
@@ -1663,7 +1663,7 @@ class CollectionFunctionsITCase extends 
BuiltInFunctionTestBase {
                         .testResult(
                                 $("f0").arrayExcept(new Integer[] {null, 2}),
                                 "ARRAY_EXCEPT(f0, ARRAY[null, 2])",
-                                new Integer[] {1, 2},
+                                new Integer[] {1},
                                 DataTypes.ARRAY(DataTypes.INT()).nullable())
                         // arrayOne is NULL
                         .testResult(
@@ -1675,7 +1675,7 @@ class CollectionFunctionsITCase extends 
BuiltInFunctionTestBase {
                         .testResult(
                                 $("f3").arrayExcept(new Integer[] {null, 42}),
                                 "ARRAY_EXCEPT(f3, ARRAY[null, 42])",
-                                new Integer[] {null, 1},
+                                new Integer[] {1},
                                 DataTypes.ARRAY(DataTypes.INT()).nullable())
                         // ARRAY<ROW<BOOLEAN, DATE>>
                         .testResult(
diff --git 
a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/scalar/ArrayExceptFunction.java
 
b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/scalar/ArrayExceptFunction.java
index 98125730214..68ffb5dbe4e 100644
--- 
a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/scalar/ArrayExceptFunction.java
+++ 
b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/scalar/ArrayExceptFunction.java
@@ -33,9 +33,9 @@ import org.apache.flink.util.FlinkRuntimeException;
 import javax.annotation.Nullable;
 
 import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
+import java.util.Set;
 
 /** Implementation of {@link BuiltInFunctionDefinitions#ARRAY_EXCEPT}. */
 @Internal
@@ -65,19 +65,18 @@ public class ArrayExceptFunction extends 
BuiltInScalarFunction {
             }
 
             List<Object> list = new ArrayList<>();
-            Map<ObjectContainer, Integer> map = new HashMap<>();
+            Set<ObjectContainer> set = new HashSet<>();
             for (int pos = 0; pos < arrayTwo.size(); pos++) {
                 final Object element = 
elementGetter.getElementOrNull(arrayTwo, pos);
                 final ObjectContainer objectContainer = 
createObjectContainer(element);
-                map.merge(objectContainer, 1, (k, v) -> v + 1);
+                set.add(objectContainer);
             }
             for (int pos = 0; pos < arrayOne.size(); pos++) {
                 final Object element = 
elementGetter.getElementOrNull(arrayOne, pos);
                 final ObjectContainer objectContainer = 
createObjectContainer(element);
-                if (map.containsKey(objectContainer)) {
-                    map.compute(objectContainer, (k, v) -> v == null || v == 1 
? null : v - 1);
-                } else {
+                if (!set.contains(objectContainer)) {
                     list.add(element);
+                    set.add(objectContainer);
                 }
             }
             return new GenericArrayData(list.toArray());

Reply via email to