This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 9a27d8490a feat: column support for `array_append`, `array_prepend`, 
`array_position` and `array_positions` (#6805)
9a27d8490a is described below

commit 9a27d8490a04afec20d607d4880a1a465ba6b783
Author: Igor Izvekov <[email protected]>
AuthorDate: Wed Jul 5 22:34:18 2023 +0300

    feat: column support for `array_append`, `array_prepend`, `array_position` 
and `array_positions` (#6805)
    
    * test: sqllogictests with columns for array_append, array_prepend, 
array_position and array_positions
    
    * feat: column support for array_append and array_prepend
    
    * feat: column support for array_position and array_positions
    
    * fix: error type
---
 .../core/tests/sqllogictests/test_files/array.slt  | 491 ++++++++++++++-------
 datafusion/expr/src/built_in_function.rs           |   4 +-
 datafusion/physical-expr/src/array_expressions.rs  | 338 ++++++++------
 datafusion/physical-expr/src/functions.rs          |   8 +-
 4 files changed, 541 insertions(+), 300 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt 
b/datafusion/core/tests/sqllogictests/test_files/array.slt
index 1f43c5f8e1..7eebb23d9c 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -19,108 +19,294 @@
 ## Array expressions Tests
 #############
 
+
+### Tables
+
+
+statement ok
+CREATE TABLE values(
+  a INT,
+  b INT,
+  c INT,
+  d FLOAT,
+  e VARCHAR
+) AS VALUES
+  (1, 1, 2, 1.1, 'Lorem'),
+  (2, 3, 4, 2.2, 'ipsum'),
+  (3, 5, 6, 3.3, 'dolor'),
+  (4, 7, 8, 4.4, 'sit'),
+  (NULL, 9, 10, 5.5, 'amet'),
+  (5, NULL, 12, 6.6, ','),
+  (6, 11, NULL, 7.7, 'consectetur'),
+  (7, 13, 14, NULL, 'adipiscing'),
+  (8, 15, 16, 8.8, NULL)
+;
+
+statement ok
+CREATE TABLE arrays
+AS VALUES
+  (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2, 
3.3), make_array('L', 'o', 'r', 'e', 'm')),
+  (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6), 
make_array('i', 'p', NULL, 'u', 'm')),
+  (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), 
make_array('d', NULL, 'l', 'o', 'r')),
+  (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 
12.2), make_array('s', 'i', 't')),
+  (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')),
+  (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')),
+  (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 
18.8), NULL)
+;
+
+statement ok
+CREATE TABLE arrays_values
+AS VALUES
+  (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
+  (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'),
+  (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
+  (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'),
+  (NULL, 44, 5, '@'),
+  (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'),
+  (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'),
+  (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL)
+;
+
+statement ok
+CREATE TABLE arrays_values_without_nulls
+AS VALUES
+  (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
+  (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.'),
+  (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
+  (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok')
+;
+
+# arrays table
+query ???
+select column1, column2, column3 from arrays;
+----
+[[, 2], [3, ]] [1.1, 2.2, 3.3] [L, o, r, e, m]
+[[3, 4], [5, 6]] [, 5.5, 6.6] [i, p, , u, m]
+[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, , l, o, r]
+[[7, ], [9, 10]] [10.1, , 12.2] [s, i, t]
+NULL [13.3, 14.4, 15.5] [a, m, e, t]
+[[11, 12], [13, 14]] NULL [,]
+[[15, 16], [, 18]] [16.6, 17.7, 18.8] NULL
+
+# values table
+query IIIRT
+select a, b, c, d, e from values;
+----
+1 1 2 1.1 Lorem
+2 3 4 2.2 ipsum
+3 5 6 3.3 dolor
+4 7 8 4.4 sit
+NULL 9 10 5.5 amet
+5 NULL 12 6.6 ,
+6 11 NULL 7.7 consectetur
+7 13 14 NULL adipiscing
+8 15 16 8.8 NULL
+
+# arrays_values table
+query ?IIT
+select column1, column2, column3, column4 from arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 ,
+[11, 12, 13, 14, 15, 16, 17, 18, , 20] 12 2 .
+[21, 22, 23, , 25, 26, 27, 28, 29, 30] 23 3 -
+[31, 32, 33, 34, 35, , 37, 38, 39, 40] 34 4 ok
+NULL 44 5 @
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $
+[51, 52, , 54, 55, 56, 57, 58, 59, 60] 55 NULL ^
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL
+
+# arrays_values_without_nulls table
+query ?II
+select column1, column2, column3 from arrays_values_without_nulls;
+----
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1
+[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2
+[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3
+[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4
+
+### Array function tests
+
+
 ## make_array
 
-# array scalar function #1
-query ??? rowsort
+# make_array scalar function #1
+query ???
 select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 
'l', 'l', 'o');
 ----
 [1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o]
 
-# array scalar function #2
-query ??? rowsort
+# make_array scalar function #2
+query ???
 select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), 
make_array([[[[1], [2]]]]);
 ----
 [1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]]
 
-# array scalar function #3
-query ?? rowsort
+# make_array scalar function #3
+query ??
 select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 
4]], [[5, 6], [7, 8]]);
 ----
 [[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
 
-# array scalar function #4
-query ?? rowsort
+# make_array scalar function #4
+query ??
 select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o');
 ----
 [[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o]
 
-# array scalar function #5
-query ? rowsort
+# make_array scalar function #5
+query ?
 select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 
6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12))))
 ----
 [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]
 
-# array scalar function #6
-query ? rowsort
+# make_array scalar function #6
+query ?
 select make_array()
 ----
 []
 
-# array scalar function #7
-query ?? rowsort
+# make_array scalar function #7
+query ??
 select make_array(make_array()), make_array(make_array(make_array()))
 ----
 [[]] [[[]]]
 
-# array scalar function with nulls
-query ??? rowsort
+# make_array scalar function with nulls
+query ???
 select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', 
NULL, 'l', NULL, 'o');
 ----
 [1, , 3] [, 2.0, ] [h, , l, , o]
 
-# array scalar function with nulls #2
-query ?? rowsort
+# make_array scalar function with nulls #2
+query ??
 select make_array(1, 2, NULL), make_array(make_array(NULL, 2), 
make_array(NULL, 3));
 ----
 [1, 2, ] [[, 2], [, 3]]
 
-# array scalar function with nulls #3
-query ??? rowsort
+# make_array scalar function with nulls #3
+query ???
 select make_array(NULL), make_array(NULL, NULL, NULL), 
make_array(make_array(NULL, NULL), make_array(NULL, NULL));
 ----
 [] [] [[], []]
 
+# make_array with columns #1
+query ????
+select make_array(a), make_array(b, c), make_array(d), make_array(e) from 
values;
+----
+[1] [1, 2] [1.1] [Lorem]
+[2] [3, 4] [2.2] [ipsum]
+[3] [5, 6] [3.3] [dolor]
+[4] [7, 8] [4.4] [sit]
+[0] [9, 10] [5.5] [amet]
+[5] [0, 12] [6.6] [,]
+[6] [11, 0] [7.7] [consectetur]
+[7] [13, 14] [0.0] [adipiscing]
+[8] [15, 16] [8.8] []
+
+# make_array with columns #2
+query ?
+select make_array(a, b, c, d) from values;
+----
+[1.0, 1.0, 2.0, 1.1]
+[2.0, 3.0, 4.0, 2.2]
+[3.0, 5.0, 6.0, 3.3]
+[4.0, 7.0, 8.0, 4.4]
+[0.0, 9.0, 10.0, 5.5]
+[5.0, 0.0, 12.0, 6.6]
+[6.0, 11.0, 0.0, 7.7]
+[7.0, 13.0, 14.0, 0.0]
+[8.0, 15.0, 16.0, 8.8]
+
 ## array_append
 
 # array_append scalar function #2
-query ? rowsort
+query ?
 select array_append(make_array(), 4);
 ----
 [4]
 
 # array_append scalar function #2
-query ?? rowsort
+query ??
 select array_append(make_array(), make_array()), array_append(make_array(), 
make_array(4));
 ----
 [[]] [[4]]
 
 # array_append scalar function #3
-query ??? rowsort
+query ???
 select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 
3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o');
 ----
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
+# array_append with columns
+query ?
+select array_append(column1, column2) from arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1]
+[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12]
+[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23]
+[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34]
+[44]
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ]
+[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55]
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66]
+
+# array_append with columns and scalars
+query ??
+select array_append(column2, 100.1), array_append(column3, '.') from arrays;
+----
+[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .]
+[, 5.5, 6.6, 100.1] [i, p, , u, m, .]
+[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .]
+[10.1, , 12.2, 100.1] [s, i, t, .]
+[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .]
+[100.1] [,, .]
+[16.6, 17.7, 18.8, 100.1] [.]
+
 ## array_prepend
 
 # array_prepend scalar function #1
-query ? rowsort
+query ?
 select array_prepend(4, make_array());
 ----
 [4]
 
 # array_prepend scalar function #2
-query ?? rowsort
+query ??
 select array_prepend(make_array(), make_array()), array_prepend(make_array(4), 
make_array());
 ----
 [[]] [[4]]
 
 # array_prepend scalar function #3
-query ??? rowsort
+query ???
 select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, 
make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o'));
 ----
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
+# array_prepend with columns
+query ?
+select array_prepend(column2, column1) from arrays_values;
+----
+[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10]
+[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20]
+[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30]
+[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40]
+[44]
+[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
+[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60]
+[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
+
+# array_prepend with columns and scalars
+query ??
+select array_prepend(100.1, column2), array_prepend('.', column3) from arrays;
+----
+[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m]
+[100.1, , 5.5, 6.6] [., i, p, , u, m]
+[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r]
+[100.1, 10.1, , 12.2] [., s, i, t]
+[100.1, 13.3, 14.4, 15.5] [., a, m, e, t]
+[100.1] [., ,]
+[100.1, 16.6, 17.7, 18.8] [.]
+
 ## array_fill
 
 # array_fill scalar function #1
@@ -144,37 +330,37 @@ select array_fill(1, make_array())
 ## array_concat
 
 # array_concat scalar function #1
-query ?? rowsort
+query ??
 select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 
9)), array_concat(make_array([1], [2]), make_array([3], [4]));
 ----
 [1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
 
 # array_concat scalar function #2
-query ? rowsort
+query ?
 select array_concat(make_array(make_array(1, 2), make_array(3, 4)), 
make_array(make_array(5, 6), make_array(7, 8)));
 ----
 [[1, 2], [3, 4], [5, 6], [7, 8]]
 
 # array_concat scalar function #3
-query ? rowsort
+query ?
 select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), 
make_array([7], [8], [9]));
 ----
 [[1], [2], [3], [4], [5], [6], [7], [8], [9]]
 
 # array_concat scalar function #4
-query ? rowsort
+query ?
 select array_concat(make_array([[1]]), make_array([[2]]));
 ----
 [[[1]], [[2]]]
 
 # array_concat scalar function #5
-query ? rowsort
+query ?
 select array_concat(make_array(2, 3), make_array());
 ----
 [2, 3]
 
 # array_concat scalar function #6
-query ? rowsort
+query ?
 select array_concat(make_array(), make_array(2, 3));
 ----
 [2, 3]
@@ -193,12 +379,50 @@ select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), 
array_position([1, 2,
 ----
 4 5 2
 
+# array_position with columns
+query II
+select array_position(column1, column2), array_position(column1, column2, 
column3) from arrays_values_without_nulls;
+----
+1 1
+2 2
+3 3
+4 4
+
+# array_position with columns and scalars
+query II
+select array_position(column1, 3), array_position(column1, 3, 5) from 
arrays_values_without_nulls;
+----
+3 NULL
+NULL NULL
+NULL NULL
+NULL NULL
+
+## array_positions
+
 # array_positions scalar function
-query ??? rowsort
+query ???
 select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 
3, 4, 5], 5), array_positions([1, 1, 1], 1);
 ----
 [3, 4] [5] [1, 2, 3]
 
+# array_positions with columns
+query ?
+select array_positions(column1, column2) from arrays_values_without_nulls;
+----
+[1]
+[2]
+[3]
+[4]
+
+# array_positions with columns and scalars
+query ??
+select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 
45], column2) from arrays_values_without_nulls;
+----
+[4] [1]
+[] []
+[] [3]
+[] []
+
 ## array_replace
 
 # array_replace scalar function
@@ -210,7 +434,7 @@ select array_replace(make_array(1, 2, 3, 4), 2, 3), 
array_replace(make_array(1,
 ## array_to_string
 
 # array_to_string scalar function
-query TTT rowsort
+query TTT
 select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 
3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|');
 ----
 h,e,l,l,o 1-2-3-4-5 1|2|3
@@ -228,13 +452,13 @@ Error during planning: Cannot automatically convert Utf8 
to List\(Field \{ name:
 select array_to_string(make_array(), ',')
 
 # array_to_string scalar function with nulls #1
-query TTT rowsort
+query TTT
 select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), 
array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), 
array_to_string(make_array(NULL, 2.0, 3.0), '|');
 ----
 h,l,o 1-3-5 2|3
 
 # array_to_string scalar function with nulls #2
-query TTT rowsort
+query TTT
 select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), 
array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), 
array_to_string(make_array(1.0, NULL, 3.0), '|', '0');
 ----
 h,-,-,-,o nil-2-nil-4-5 1|0|3
@@ -288,19 +512,19 @@ select trim_array(make_array(), 0), 
trim_array(make_array(), 1)
 ## array_length
 
 # array_length scalar function
-query III rowsort
+query III
 select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 
3)), array_length(make_array([1, 2], [3, 4], [5, 6]));
 ----
 5 3 3
 
 # array_length scalar function #2
-query III rowsort
+query III
 select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 
2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1);
 ----
 5 3 3
 
 # array_length scalar function #3
-query III rowsort
+query III
 select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 
2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2);
 ----
 NULL NULL 2
@@ -312,7 +536,7 @@ Error during planning: Cannot automatically convert 
List\(Field \{ name: "item",
 select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, 
[3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), 
array_length(array_fill(3, [3, 2, 5]), 4);
 
 # array_length scalar function #5
-query III rowsort
+query III
 select array_length(make_array()), array_length(make_array(), 1), 
array_length(make_array(), 2)
 ----
 0 0 NULL
@@ -337,14 +561,14 @@ caused by
 Error during planning: Cannot automatically convert List\(Field \{ name: 
"item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\) to UInt8
 select array_dims(make_array()), array_dims(make_array(make_array()))
 
+## array_ndims
+
 # array_ndims scalar function
-query III rowsort
+query III
 select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 
4])), array_ndims(make_array([[[[1], [2]]]]));
 ----
 1 2 5
 
-## array_ndims
-
 # array_ndims scalar function #2
 query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
 caused by
@@ -352,183 +576,106 @@ Error during planning: Cannot automatically convert 
List\(Field \{ name: "item",
 select array_ndims(array_fill(1, [1, 2, 3])), 
array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]);
 
 # array_ndims scalar function #3
-query II rowsort
+query II
 select array_ndims(make_array()), array_ndims(make_array(make_array()))
 ----
 1 2
 
-## array concatenate opeartor
-
-# array concatenate operator #1 (like array_concat scalar function)
-query ?? rowsort
-select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), 
make_array([1], [2]) || make_array([3], [4]);
-----
-[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
-
-# array concatenate operator #2 (like array_append scalar function)
-query ??? rowsort
-select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, 
make_array('h', 'e', 'l', 'l') || 'o';
-----
-[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
-
-# array concatenate operator #3 (like array_prepend scalar function)
-query ??? rowsort
-select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || 
make_array('e', 'l', 'l', 'o');
-----
-[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
-
-# make_array
-
-query ?
-select make_array(1, 2.0)
-----
-[1.0, 2.0]
-
-query ?
-select make_array(null, 1.0)
-----
-[, 1.0]
-
-query ?
-select make_array(1, 2.0, null, 3)
-----
-[1.0, 2.0, , 3.0]
-
-query ?
-select make_array(1.0, '2', null)
-----
-[1.0, 2, ]
-
-statement ok
-create table foo1 (x int, y double) as values (1, 2.0);
-
-query ?
-select make_array(x, y) from foo1;
-----
-[1.0, 2.0]
-
-statement ok
-create table foo2 (x float, y varchar) as values (1.0, '1');
-
-query ?
-select make_array(x, y) from foo2;
-----
-[1.0, 1]
-
-# array_contains
+## array_contains
 
 # array_contains scalar function #1
-query BBB rowsort
+query BBB
 select array_contains(make_array(1, 2, 3), make_array(1, 1, 2, 3)), 
array_contains([1, 2, 3], [1, 1, 2]), array_contains([1, 2, 3], [2, 1, 3, 1]);
 ----
 true true true
 
 # array_contains scalar function #2
-query BB rowsort
+query BB
 select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 3]]), 
array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 4]);
 ----
 true true
 
 # array_contains scalar function #3
-query BBB rowsort
+query BBB
 select array_contains(make_array(1, 2, 3), make_array(1, 2, 3, 4)), 
array_contains([1, 2, 3], [1, 1, 4]), array_contains([1, 2, 3], [2, 1, 3, 4]);
 ----
 false false false
 
 # array_contains scalar function #4
-query BB rowsort
+query BB
 select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 5]]), 
array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 5]);
 ----
 false false
 
 # array_contains scalar function #5
-query BB rowsort
+query BB
 select array_contains([true, true, false, true, false], [true, false, false]), 
array_contains([true, false, true], [true, true]);
 ----
 true true
 
 # array_contains scalar function #6
-query BB rowsort
+query BB
 select array_contains(make_array(true, true, true), make_array(false, false)), 
array_contains([false, false, false], [true, true]);
 ----
 false false
 
-## array_contains array
 
+### Array operators tests
 
-statement ok
-CREATE TABLE t
-AS VALUES
-(make_array(1,2,3), make_array(1,2,3)),
-(make_array(1,2,3), make_array(2,3)),
-(make_array(2,3),   make_array(1,2,3)),
-(null,              make_array(1,2,3)),
-(make_array(2,3),   null)
-;
 
+## array concatenate operator
 
+# array concatenate operator with scalars #1 (like array_concat scalar 
function)
 query ??
-SELECT
-  column1,
-  column2
-FROM t
+select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), 
make_array([1], [2]) || make_array([3], [4]);
 ----
-[1, 2, 3] [1, 2, 3]
-[1, 2, 3] [2, 3]
-[2, 3] [1, 2, 3]
-NULL [1, 2, 3]
-[2, 3] NULL
-
+[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
 
-# incorrect answer (one row) to 
https://github.com/apache/arrow-datafusion/issues/6709
+# array concatenate operator with scalars #2 (like array_append scalar 
function)
+query ???
+select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, 
make_array('h', 'e', 'l', 'l') || 'o';
+----
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
-query BB
-SELECT
-  array_contains(column1, column2) as c12,
-  array_contains(column1, column2) as c21
-FROM t
+# array concatenate operator with scalars #3 (like array_prepend scalar 
function)
+query ???
+select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || 
make_array('e', 'l', 'l', 'o');
 ----
-true true
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
 
-statement ok
-drop table t
+### Array casting tests
 
 
-## array_contains array (nested)
+## make_array
 
+# make_array scalar function #1
+query ?
+select make_array(1, 2.0)
+----
+[1.0, 2.0]
 
-statement ok
-CREATE TABLE t
-AS VALUES
-(make_array(make_array(2),make_array(3,4)),   make_array(3,4)),
-(make_array(make_array(2),make_array(3,4)),   make_array(4,3)),
-(make_array(make_array(2),make_array(3,4)),   make_array(3)),
-(make_array(make_array(2),make_array(3,4)),   make_array(2))
-;
+# make_array scalar function #2
+query ?
+select make_array(null, 1.0)
+----
+[, 1.0]
 
+# make_array scalar function #3
+query ?
+select make_array(1, 2.0, null, 3)
+----
+[1.0, 2.0, , 3.0]
 
-query ??
-SELECT
-  column1,
-  column2
-FROM t
+# make_array scalar function #4
+query ?
+select make_array(1.0, '2', null)
 ----
-[[2], [3, 4]] [3, 4]
-[[2], [3, 4]] [4, 3]
-[[2], [3, 4]] [3]
-[[2], [3, 4]] [2]
+[1.0, 2, ]
 
 
-# incorrect answer (one row) to 
https://github.com/apache/arrow-datafusion/issues/6709
+### FixedSizeListArray
 
-query BB
-SELECT
-  array_contains(column1, column2) as c12,
-  array_contains(column1, column2) as c21
-FROM t
-----
-true true
 
 statement ok
 CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION 
'tests/data/fixed_size_list_array.parquet';
@@ -566,3 +713,19 @@ query ?
 select make_array(f0) from fixed_size_list_array
 ----
 [[1, 2], [3, 4]]
+
+
+### Delete tables
+
+
+statement ok
+drop table values;
+
+statement ok
+drop table arrays;
+
+statement ok
+drop table arrays_values;
+
+statement ok
+drop table arrays_values_without_nulls;
diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index 6905462275..103f1047fa 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -488,9 +488,9 @@ impl BuiltinScalarFunction {
             )))),
             BuiltinScalarFunction::ArrayLength => Ok(UInt8),
             BuiltinScalarFunction::ArrayNdims => Ok(UInt8),
-            BuiltinScalarFunction::ArrayPosition => Ok(UInt8),
+            BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
             BuiltinScalarFunction::ArrayPositions => {
-                Ok(List(Arc::new(Field::new("item", UInt8, true))))
+                Ok(List(Arc::new(Field::new("item", UInt64, true))))
             }
             BuiltinScalarFunction::ArrayPrepend => Ok(List(Arc::new(Field::new(
                 "item",
diff --git a/datafusion/physical-expr/src/array_expressions.rs 
b/datafusion/physical-expr/src/array_expressions.rs
index bddeef526a..cd174918db 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -18,7 +18,7 @@
 //! Array expressions
 
 use arrow::array::*;
-use arrow::buffer::Buffer;
+use arrow::buffer::{Buffer, OffsetBuffer};
 use arrow::compute;
 use arrow::datatypes::{DataType, Field};
 use core::any::type_name;
@@ -197,15 +197,53 @@ pub fn make_array(values: &[ColumnarValue]) -> 
Result<ColumnarValue> {
 
 macro_rules! append {
     ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), 
$ARRAY_TYPE);
+        let mut offsets: Vec<i32> = vec![0];
+        let mut values =
+            downcast_arg!(new_empty_array($ELEMENT.data_type()), 
$ARRAY_TYPE).clone();
+
         let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
-        let cat = compute::concat(&[child_array, element])?;
-        let mut scalars = vec![];
-        for i in 0..cat.len() {
-            
scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?));
+        for (arr, el) in $ARRAY.iter().zip(element.iter()) {
+            let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+                DataFusionError::Internal(format!("offsets should not be 
empty",))
+            })?;
+            match arr {
+                Some(arr) => {
+                    let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+                    values = downcast_arg!(
+                        compute::concat(&[
+                            &values,
+                            child_array,
+                            &$ARRAY_TYPE::from(vec![el])
+                        ])?
+                        .clone(),
+                        $ARRAY_TYPE
+                    )
+                    .clone();
+                    offsets.push(last_offset + child_array.len() as i32 + 
1i32);
+                }
+                None => {
+                    values = downcast_arg!(
+                        compute::concat(&[
+                            &values,
+                            &$ARRAY_TYPE::from(vec![el.clone()])
+                        ])?
+                        .clone(),
+                        $ARRAY_TYPE
+                    )
+                    .clone();
+                    offsets.push(last_offset + 1i32);
+                }
+            }
         }
-        scalars
+
+        let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(), 
true));
+
+        Arc::new(ListArray::try_new(
+            field,
+            OffsetBuffer::new(offsets.into()),
+            Arc::new(values),
+            None,
+        )?)
     }};
 }
 
@@ -221,7 +259,7 @@ pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
     let arr = as_list_array(&args[0])?;
     let element = &args[1];
 
-    let scalars = match (arr.value_type(), element.data_type()) {
+    let res = match (arr.value_type(), element.data_type()) {
                 (DataType::Utf8, DataType::Utf8) => append!(arr, element, 
StringArray),
                 (DataType::LargeUtf8, DataType::LargeUtf8) => append!(arr, 
element, LargeStringArray),
                 (DataType::Boolean, DataType::Boolean) => append!(arr, 
element, BooleanArray),
@@ -243,20 +281,58 @@ pub fn array_append(args: &[ArrayRef]) -> 
Result<ArrayRef> {
                 }
     };
 
-    Ok(array(scalars.as_slice())?.into_array(1))
+    Ok(res)
 }
 
 macro_rules! prepend {
     ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), 
$ARRAY_TYPE);
+        let mut offsets: Vec<i32> = vec![0];
+        let mut values =
+            downcast_arg!(new_empty_array($ELEMENT.data_type()), 
$ARRAY_TYPE).clone();
+
         let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
-        let cat = compute::concat(&[element, child_array])?;
-        let mut scalars = vec![];
-        for i in 0..cat.len() {
-            
scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?));
+        for (arr, el) in $ARRAY.iter().zip(element.iter()) {
+            let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+                DataFusionError::Internal(format!("offsets should not be 
empty",))
+            })?;
+            match arr {
+                Some(arr) => {
+                    let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+                    values = downcast_arg!(
+                        compute::concat(&[
+                            &values,
+                            &$ARRAY_TYPE::from(vec![el]),
+                            child_array
+                        ])?
+                        .clone(),
+                        $ARRAY_TYPE
+                    )
+                    .clone();
+                    offsets.push(last_offset + child_array.len() as i32 + 
1i32);
+                }
+                None => {
+                    values = downcast_arg!(
+                        compute::concat(&[
+                            &values,
+                            &$ARRAY_TYPE::from(vec![el.clone()])
+                        ])?
+                        .clone(),
+                        $ARRAY_TYPE
+                    )
+                    .clone();
+                    offsets.push(last_offset + 1i32);
+                }
+            }
         }
-        scalars
+
+        let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(), 
true));
+
+        Arc::new(ListArray::try_new(
+            field,
+            OffsetBuffer::new(offsets.into()),
+            Arc::new(values),
+            None,
+        )?)
     }};
 }
 
@@ -272,7 +348,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result<ArrayRef> 
{
     let element = &args[0];
     let arr = as_list_array(&args[1])?;
 
-    let scalars = match (arr.value_type(), element.data_type()) {
+    let res = match (arr.value_type(), element.data_type()) {
                 (DataType::Utf8, DataType::Utf8) => prepend!(arr, element, 
StringArray),
                 (DataType::LargeUtf8, DataType::LargeUtf8) => prepend!(arr, 
element, LargeStringArray),
                 (DataType::Boolean, DataType::Boolean) => prepend!(arr, 
element, BooleanArray),
@@ -294,7 +370,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result<ArrayRef> 
{
                 }
     };
 
-    Ok(array(scalars.as_slice())?.into_array(1))
+    Ok(res)
 }
 
 /// Array_concat/Array_cat SQL function
@@ -420,74 +496,58 @@ pub fn array_fill(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
 
 macro_rules! position {
     ($ARRAY:expr, $ELEMENT:expr, $INDEX:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), 
$ARRAY_TYPE);
-        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0);
-
-        match child_array
+        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
+        $ARRAY
             .iter()
-            .skip($INDEX)
-            .position(|x| x == Some(element))
-        {
-            Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::UInt8(Some(
-                (value + $INDEX + 1) as u8,
-            )))),
-            None => Ok(ColumnarValue::Scalar(ScalarValue::Null)),
-        }
+            .zip(element.iter())
+            .zip($INDEX.iter())
+            .map(|((arr, el), i)| {
+                let index = match i {
+                    Some(i) => {
+                        if i <= 0 {
+                            0
+                        } else {
+                            i - 1
+                        }
+                    }
+                    None => {
+                        return Err(DataFusionError::Execution(
+                            "initial position must not be null".to_string(),
+                        ))
+                    }
+                };
+
+                match arr {
+                    Some(arr) => {
+                        let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+
+                        match child_array
+                            .iter()
+                            .skip(index as usize)
+                            .position(|x| x == el)
+                        {
+                            Some(value) => Ok(Some(value as u64 + index as u64 
+ 1u64)),
+                            None => Ok(None),
+                        }
+                    }
+                    None => Ok(None),
+                }
+            })
+            .collect::<Result<UInt64Array>>()?
     }};
 }
 
 /// Array_position SQL function
-pub fn array_position(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let arr = match &args[0] {
-        ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
-        ColumnarValue::Array(arr) => arr.clone(),
-    };
-
-    let element = match &args[1] {
-        ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
-        _ => {
-            return Err(DataFusionError::Internal(
-                "Array_position function requires scalar element".to_string(),
-            ))
-        }
-    };
+pub fn array_position(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let arr = as_list_array(&args[0])?;
+    let element = &args[1];
 
-    let mut index: usize = 0;
+    let mut index = Int64Array::from_value(0, arr.len());
     if args.len() == 3 {
-        let scalar =
-            match &args[2] {
-                ColumnarValue::Scalar(scalar) => scalar.clone(),
-                _ => return Err(DataFusionError::Internal(
-                    "Array_position function requires positive integer scalar 
element"
-                        .to_string(),
-                )),
-            };
-
-        index =
-            match scalar {
-                ScalarValue::Int8(Some(value)) => value as usize,
-                ScalarValue::Int16(Some(value)) => value as usize,
-                ScalarValue::Int32(Some(value)) => value as usize,
-                ScalarValue::Int64(Some(value)) => value as usize,
-                ScalarValue::UInt8(Some(value)) => value as usize,
-                ScalarValue::UInt16(Some(value)) => value as usize,
-                ScalarValue::UInt32(Some(value)) => value as usize,
-                ScalarValue::UInt64(Some(value)) => value as usize,
-                _ => return Err(DataFusionError::Internal(
-                    "Array_position function requires positive integer scalar 
element"
-                        .to_string(),
-                )),
-            };
-
-        if index == 0 {
-            index = 0;
-        } else {
-            index -= 1;
-        }
+        index = as_int64_array(&args[2])?.clone();
     }
 
-    match arr.data_type() {
+    let res = match arr.data_type() {
         DataType::List(field) => match field.data_type() {
             DataType::Utf8 => position!(arr, element, index, StringArray),
             DataType::LargeUtf8 => position!(arr, element, index, 
LargeStringArray),
@@ -502,50 +562,75 @@ pub fn array_position(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
             DataType::UInt16 => position!(arr, element, index, UInt16Array),
             DataType::UInt32 => position!(arr, element, index, UInt32Array),
             DataType::UInt64 => position!(arr, element, index, UInt64Array),
-            data_type => Err(DataFusionError::NotImplemented(format!(
-                "Array_position is not implemented for types '{data_type:?}'."
-            ))),
+            data_type => {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Array_position is not implemented for types 
'{data_type:?}'."
+                )))
+            }
         },
-        data_type => Err(DataFusionError::NotImplemented(format!(
-            "Array is not type '{data_type:?}'."
-        ))),
-    }
+        data_type => {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Array is not type '{data_type:?}'."
+            )))
+        }
+    };
+
+    Ok(Arc::new(res))
 }
 
 macro_rules! positions {
     ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), 
$ARRAY_TYPE);
-        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0);
-
-        let mut res = vec![];
-        for (i, x) in child_array.iter().enumerate() {
-            if x == Some(element) {
-                res.push(ColumnarValue::Array(Arc::new(UInt8Array::from(vec![
-                    Some((i + 1) as u8),
-                ]))));
-            }
+        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
+        let mut offsets: Vec<i32> = vec![0];
+        let mut values =
+            downcast_arg!(new_empty_array(&DataType::UInt64), 
UInt64Array).clone();
+        for comp in $ARRAY
+            .iter()
+            .zip(element.iter())
+            .map(|(arr, el)| match arr {
+                Some(arr) => {
+                    let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+                    let res = child_array
+                        .iter()
+                        .enumerate()
+                        .filter(|(_, x)| *x == el)
+                        .flat_map(|(i, _)| Some((i + 1) as u64))
+                        .collect::<UInt64Array>();
+
+                    Ok(res)
+                }
+                None => Ok(downcast_arg!(
+                    new_empty_array(&DataType::UInt64),
+                    UInt64Array
+                )
+                .clone()),
+            })
+            .collect::<Result<Vec<UInt64Array>>>()?
+        {
+            let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+                DataFusionError::Internal(format!("offsets should not be 
empty",))
+            })?;
+            values =
+                downcast_arg!(compute::concat(&[&values, &comp,])?.clone(), 
UInt64Array)
+                    .clone();
+            offsets.push(last_offset + comp.len() as i32);
         }
 
-        res
+        let field = Arc::new(Field::new("item", DataType::UInt64, true));
+
+        Arc::new(ListArray::try_new(
+            field,
+            OffsetBuffer::new(offsets.into()),
+            Arc::new(values),
+            None,
+        )?)
     }};
 }
 
 /// Array_positions SQL function
-pub fn array_positions(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let arr = match &args[0] {
-        ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
-        ColumnarValue::Array(arr) => arr.clone(),
-    };
-
-    let element = match &args[1] {
-        ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
-        _ => {
-            return Err(DataFusionError::Internal(
-                "Array_positions function requires scalar element".to_string(),
-            ))
-        }
-    };
+pub fn array_positions(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let arr = as_list_array(&args[0])?;
+    let element = &args[1];
 
     let res = match arr.data_type() {
         DataType::List(field) => match field.data_type() {
@@ -575,7 +660,7 @@ pub fn array_positions(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
         }
     };
 
-    array(res.as_slice())
+    Ok(res)
 }
 
 macro_rules! remove {
@@ -1465,29 +1550,22 @@ mod tests {
     #[test]
     fn test_array_position() {
         // array_position([1, 2, 3, 4], 3) = 3
-        let list_array = return_array();
-        let array = array_position(&[
-            list_array,
-            ColumnarValue::Scalar(ScalarValue::Int64(Some(3))),
-        ])
-        .expect("failed to initialize function array_position")
-        .into_array(1);
-        let result =
-            as_uint8_array(&array).expect("failed to initialize function 
array_position");
+        let list_array = return_array().into_array(1);
+        let array = array_position(&[list_array, 
Arc::new(Int64Array::from_value(3, 1))])
+            .expect("failed to initialize function array_position");
+        let result = as_uint64_array(&array)
+            .expect("failed to initialize function array_position");
 
-        assert_eq!(result, &UInt8Array::from(vec![3]));
+        assert_eq!(result, &UInt64Array::from(vec![3]));
     }
 
     #[test]
     fn test_array_positions() {
         // array_positions([1, 2, 3, 4], 3) = [3]
-        let list_array = return_array();
-        let array = array_positions(&[
-            list_array,
-            ColumnarValue::Scalar(ScalarValue::Int64(Some(3))),
-        ])
-        .expect("failed to initialize function array_position")
-        .into_array(1);
+        let list_array = return_array().into_array(1);
+        let array =
+            array_positions(&[list_array, Arc::new(Int64Array::from_value(3, 
1))])
+                .expect("failed to initialize function array_position");
         let result =
             as_list_array(&array).expect("failed to initialize function 
array_position");
 
@@ -1497,7 +1575,7 @@ mod tests {
             result
                 .value(0)
                 .as_any()
-                .downcast_ref::<UInt8Array>()
+                .downcast_ref::<UInt64Array>()
                 .unwrap()
                 .values()
         );
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index 08916d89c9..215582a1a8 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -423,11 +423,11 @@ pub fn create_physical_fun(
             Arc::new(|args| 
make_scalar_function(array_expressions::array_ndims)(args))
         }
         BuiltinScalarFunction::ArrayPosition => {
-            Arc::new(array_expressions::array_position)
-        }
-        BuiltinScalarFunction::ArrayPositions => {
-            Arc::new(array_expressions::array_positions)
+            Arc::new(|args| 
make_scalar_function(array_expressions::array_position)(args))
         }
+        BuiltinScalarFunction::ArrayPositions => Arc::new(|args| {
+            make_scalar_function(array_expressions::array_positions)(args)
+        }),
         BuiltinScalarFunction::ArrayPrepend => {
             Arc::new(|args| 
make_scalar_function(array_expressions::array_prepend)(args))
         }


Reply via email to