alamb commented on code in PR #6805:
URL: https://github.com/apache/arrow-datafusion/pull/6805#discussion_r1253391552


##########
datafusion/expr/src/built_in_function.rs:
##########
@@ -482,9 +482,9 @@ impl BuiltinScalarFunction {
             )))),
             BuiltinScalarFunction::ArrayLength => Ok(UInt8),
             BuiltinScalarFunction::ArrayNdims => Ok(UInt8),
-            BuiltinScalarFunction::ArrayPosition => Ok(UInt8),
+            BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
             BuiltinScalarFunction::ArrayPositions => {
-                Ok(List(Arc::new(Field::new("item", UInt8, true))))
+                Ok(List(Arc::new(Field::new("item", UInt64, true))))

Review Comment:
   👍 



##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -243,20 +281,58 @@ pub fn array_append(args: &[ArrayRef]) -> 
Result<ArrayRef> {
                 }
     };
 
-    Ok(array(scalars.as_slice())?.into_array(1))
+    Ok(res)
 }
 
 macro_rules! prepend {
     ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), 
$ARRAY_TYPE);
+        let mut offsets: Vec<i32> = vec![0];
+        let mut values =
+            downcast_arg!(new_empty_array($ELEMENT.data_type()), 
$ARRAY_TYPE).clone();
+
         let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
-        let cat = compute::concat(&[element, child_array])?;
-        let mut scalars = vec![];
-        for i in 0..cat.len() {
-            
scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?));
+        for (arr, el) in $ARRAY.iter().zip(element.iter()) {
+            let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+                DataFusionError::Internal(format!("offsets should not be 
empty",))
+            })?;
+            match arr {
+                Some(arr) => {
+                    let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+                    values = downcast_arg!(
+                        compute::concat(&[

Review Comment:
   When concatenating two arrays I don't think there is any difference.
   
   however, you can see in the `concat` implementation it uses 
`MutableArrayData`: 
https://docs.rs/arrow-select/43.0.0/src/arrow_select/concat.rs.html#86-91 -- 
maybe that is an approach to pursue?
   
   @tustvold  do you know of any reason we can't add a `info_builder` method 
for `GenericListArray`? If not, I'll file a ticket request and see if anyone in 
the community wants to try.



##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -420,74 +496,58 @@ pub fn array_fill(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
 
 macro_rules! position {
     ($ARRAY:expr, $ELEMENT:expr, $INDEX:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), 
$ARRAY_TYPE);
-        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0);
-
-        match child_array
+        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
+        $ARRAY
             .iter()
-            .skip($INDEX)
-            .position(|x| x == Some(element))
-        {
-            Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::UInt8(Some(
-                (value + $INDEX + 1) as u8,
-            )))),
-            None => Ok(ColumnarValue::Scalar(ScalarValue::Null)),
-        }
+            .zip(element.iter())
+            .zip($INDEX.iter())
+            .map(|((arr, el), i)| {
+                let index = match i {
+                    Some(i) => {
+                        if i <= 0 {
+                            0
+                        } else {
+                            i - 1
+                        }
+                    }
+                    None => {
+                        return Err(DataFusionError::Internal(

Review Comment:
   I think this error can be generated by bad user input (and not a bug in 
DataFusion) so I don't think it should be an internal error:
   
   ```suggestion
                           return Err(DataFusionError::Execution(
   ```



##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -420,74 +496,58 @@ pub fn array_fill(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
 
 macro_rules! position {
     ($ARRAY:expr, $ELEMENT:expr, $INDEX:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), 
$ARRAY_TYPE);
-        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0);
-
-        match child_array
+        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
+        $ARRAY
             .iter()
-            .skip($INDEX)
-            .position(|x| x == Some(element))
-        {
-            Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::UInt8(Some(
-                (value + $INDEX + 1) as u8,
-            )))),
-            None => Ok(ColumnarValue::Scalar(ScalarValue::Null)),
-        }
+            .zip(element.iter())
+            .zip($INDEX.iter())
+            .map(|((arr, el), i)| {
+                let index = match i {
+                    Some(i) => {
+                        if i <= 0 {
+                            0
+                        } else {
+                            i - 1
+                        }
+                    }
+                    None => {
+                        return Err(DataFusionError::Internal(
+                            "initial position must not be null".to_string(),

Review Comment:
   I think following postgres seems reasonable to me
   



##########
datafusion/core/tests/sqllogictests/test_files/array.slt:
##########
@@ -19,108 +19,294 @@
 ## Array expressions Tests
 #############
 
+
+### Tables
+
+
+statement ok
+CREATE TABLE values(
+  a INT,
+  b INT,
+  c INT,
+  d FLOAT,
+  e VARCHAR
+) AS VALUES
+  (1, 1, 2, 1.1, 'Lorem'),
+  (2, 3, 4, 2.2, 'ipsum'),
+  (3, 5, 6, 3.3, 'dolor'),
+  (4, 7, 8, 4.4, 'sit'),
+  (NULL, 9, 10, 5.5, 'amet'),
+  (5, NULL, 12, 6.6, ','),
+  (6, 11, NULL, 7.7, 'consectetur'),
+  (7, 13, 14, NULL, 'adipiscing'),
+  (8, 15, 16, 8.8, NULL)
+;
+
+statement ok
+CREATE TABLE arrays
+AS VALUES
+  (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2, 
3.3), make_array('L', 'o', 'r', 'e', 'm')),
+  (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6), 
make_array('i', 'p', NULL, 'u', 'm')),
+  (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), 
make_array('d', NULL, 'l', 'o', 'r')),
+  (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 
12.2), make_array('s', 'i', 't')),
+  (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')),
+  (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')),
+  (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 
18.8), NULL)
+;
+
+statement ok
+CREATE TABLE arrays_values
+AS VALUES
+  (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
+  (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'),
+  (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
+  (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'),
+  (NULL, 44, 5, '@'),
+  (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'),
+  (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'),
+  (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL)
+;
+
+statement ok
+CREATE TABLE arrays_values_without_nulls
+AS VALUES
+  (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
+  (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.'),
+  (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
+  (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok')
+;
+
+# arrays table
+query ???
+select column1, column2, column3 from arrays;
+----
+[[, 2], [3, ]] [1.1, 2.2, 3.3] [L, o, r, e, m]
+[[3, 4], [5, 6]] [, 5.5, 6.6] [i, p, , u, m]
+[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, , l, o, r]
+[[7, ], [9, 10]] [10.1, , 12.2] [s, i, t]
+NULL [13.3, 14.4, 15.5] [a, m, e, t]
+[[11, 12], [13, 14]] NULL [,]
+[[15, 16], [, 18]] [16.6, 17.7, 18.8] NULL
+
+# values table
+query IIIRT
+select a, b, c, d, e from values;
+----
+1 1 2 1.1 Lorem
+2 3 4 2.2 ipsum
+3 5 6 3.3 dolor
+4 7 8 4.4 sit
+NULL 9 10 5.5 amet
+5 NULL 12 6.6 ,
+6 11 NULL 7.7 consectetur
+7 13 14 NULL adipiscing
+8 15 16 8.8 NULL
+
+# arrays_values table
+query ?IIT
+select column1, column2, column3, column4 from arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 ,
+[11, 12, 13, 14, 15, 16, 17, 18, , 20] 12 2 .
+[21, 22, 23, , 25, 26, 27, 28, 29, 30] 23 3 -
+[31, 32, 33, 34, 35, , 37, 38, 39, 40] 34 4 ok
+NULL 44 5 @
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $
+[51, 52, , 54, 55, 56, 57, 58, 59, 60] 55 NULL ^
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL
+
+# arrays_values_without_nulls table
+query ?II
+select column1, column2, column3 from arrays_values_without_nulls;
+----
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1
+[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2
+[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3
+[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4
+
+### Array function tests
+
+
 ## make_array
 
-# array scalar function #1
-query ??? rowsort
+# make_array scalar function #1
+query ???
 select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 
'l', 'l', 'o');
 ----
 [1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o]
 
-# array scalar function #2
-query ??? rowsort
+# make_array scalar function #2
+query ???
 select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), 
make_array([[[[1], [2]]]]);
 ----
 [1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]]
 
-# array scalar function #3
-query ?? rowsort
+# make_array scalar function #3
+query ??
 select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 
4]], [[5, 6], [7, 8]]);
 ----
 [[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
 
-# array scalar function #4
-query ?? rowsort
+# make_array scalar function #4
+query ??
 select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o');
 ----
 [[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o]
 
-# array scalar function #5
-query ? rowsort
+# make_array scalar function #5
+query ?
 select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 
6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12))))
 ----
 [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]
 
-# array scalar function #6
-query ? rowsort
+# make_array scalar function #6
+query ?
 select make_array()
 ----
 []
 
-# array scalar function #7
-query ?? rowsort
+# make_array scalar function #7
+query ??
 select make_array(make_array()), make_array(make_array(make_array()))
 ----
 [[]] [[[]]]
 
-# array scalar function with nulls
-query ??? rowsort
+# make_array scalar function with nulls
+query ???
 select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', 
NULL, 'l', NULL, 'o');
 ----
 [1, , 3] [, 2.0, ] [h, , l, , o]
 
-# array scalar function with nulls #2
-query ?? rowsort
+# make_array scalar function with nulls #2
+query ??
 select make_array(1, 2, NULL), make_array(make_array(NULL, 2), 
make_array(NULL, 3));
 ----
 [1, 2, ] [[, 2], [, 3]]
 
-# array scalar function with nulls #3
-query ??? rowsort
+# make_array scalar function with nulls #3
+query ???
 select make_array(NULL), make_array(NULL, NULL, NULL), 
make_array(make_array(NULL, NULL), make_array(NULL, NULL));
 ----
 [] [] [[], []]
 
+# make_array with columns #1

Review Comment:
   We can automatically ensure they stay in sync by using the postgres 
compatability mode if you wanted: 
https://github.com/apache/arrow-datafusion/tree/main/datafusion/core/tests/sqllogictests#running-tests-postgres-compatibility



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to