alamb commented on code in PR #6936:
URL: https://github.com/apache/arrow-datafusion/pull/6936#discussion_r1279718073


##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -363,6 +363,177 @@ pub fn make_array(arrays: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 }
 
+fn return_empty(return_null: bool, data_type: DataType) -> Arc<dyn Array> {
+    if return_null {
+        new_null_array(&data_type, 1)
+    } else {
+        new_empty_array(&data_type)
+    }
+}
+
+macro_rules! list_slice {
+    ($ARRAY:expr, $I:expr, $J:expr, $RETURN_ELEMENT:expr, $ARRAY_TYPE:ident) 
=> {{
+        let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
+        if $I == 0 && $J == 0 || $ARRAY.is_empty() {
+            return return_empty($RETURN_ELEMENT, $ARRAY.data_type().clone());
+        }
+
+        let i = if $I < 0 {
+            if $I.abs() as usize > array.len() {
+                return return_empty(true, $ARRAY.data_type().clone());
+            }
+
+            (array.len() as i64 + $I + 1) as usize
+        } else {
+            if $I == 0 {
+                1
+            } else {
+                $I as usize
+            }
+        };
+        let j = if $J < 0 {
+            if $J.abs() as usize > array.len() {
+                return return_empty(true, $ARRAY.data_type().clone());
+            }
+
+            if $RETURN_ELEMENT {
+                (array.len() as i64 + $J + 1) as usize
+            } else {
+                (array.len() as i64 + $J) as usize
+            }
+        } else {
+            if $J == 0 {
+                1
+            } else {
+                if $J as usize > array.len() {
+                    array.len()
+                } else {
+                    $J as usize
+                }
+            }
+        };
+
+        if i > j || i as usize > $ARRAY.len() {
+            return_empty($RETURN_ELEMENT, $ARRAY.data_type().clone())
+        } else {
+            Arc::new(array.slice((i - 1), (j + 1 - i)))
+        }
+    }};
+}
+
+macro_rules! slice {

Review Comment:
   I didn't fully understand this implementation -- it seem like picking a 
single value from a list array can be implemented by calculating the 
appropriate offsets and then calling 
[take](https://docs.rs/arrow/latest/arrow/compute/kernels/take/fn.take.html)
   
   For example, given A ListArray like this
   
   
   ```
                                  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
                                                          ┌ ─ ─ ─ ─ ─ ─ ┐    │
    ┌─────────────┐  ┌───────┐    │     ┌───┐   ┌───┐       ┌───┐ ┌───┐       
    │   [A,B,C]   │  │ (0,3) │          │ 1 │   │ 0 │     │ │ 1 │ │ A │ │ 0  │
    ├─────────────┤  ├───────┤    │     ├───┤   ├───┤       ├───┤ ├───┤       
    │ [] (empty)  │  │ (3,3) │          │ 1 │   │ 3 │     │ │ 1 │ │ B │ │ 1  │
    ├─────────────┤  ├───────┤    │     ├───┤   ├───┤       ├───┤ ├───┤       
    │    NULL     │  │ (3,4) │          │ 0 │   │ 4 │     │ │ 1 │ │ C │ │ 2  │
    ├─────────────┤  ├───────┤    │     ├───┤   ├───┤       ├───┤ ├───┤       
    │     [D]     │  │  4,5  │          │ 1 │   │ 5 │     │ │ 1 │ │ ? │ │ 3  │
    ├─────────────┤  ├───────┤    │     ├───┤   ├───┤       ├───┤ ├───┤       
    │  [NULL, F]  │  │  5,7  │          │ 1 │   │ 5 │     │ │ 0 │ │ D │ │ 4  │
    └─────────────┘  └───────┘    │     └───┘   ├───┤       ├───┤ ├───┤       
                                                │ 7 │     │ │ 1 │ │ ? │ │ 5  │
                                  │  Validity   └───┘       ├───┤ ├───┤       
       Logical       Logical         (nulls)   Offsets    │ │ 1 │ │ F │ │ 6  │
        Values       Offsets      │                         └───┘ └───┘       
                                                          │    Values   │    │
                   (offsets[i],   │   ListArray               (Array)         
                  offsets[i+1])                           └ ─ ─ ─ ─ ─ ─ ┘    │
                                  └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
   ```
   
   Computing `col[1]` (aka take the second element) could be computed by adding 
1 to the first logical offset, checking if that is still in the array
   
   So in this example, the take indicies would be
   ```
   [ 
     Some(1),  (add 1 to start of (0, 3) is 1
     None , //   (adding 1 start of (3,3) is 4 which is outside,
     None, // input was null,
     None,  // adding 1 start of (4,5 ) is 5 which is outside
     Some(6), // adding 1 to start of (5,7) is 6
   ]
   ```
   
   This calculation does not depend on datatype of the elements and would be 
fully general
   
   A similar calculation could be done for slice though in that case you would 
have to build up the offsets of the new list array as well as the indices of 
the take of the values. 
   
   In general I think many of these array calculations and manipulations can be 
done in terms of the take kernel



##########
datafusion/expr/src/field_util.rs:
##########
@@ -18,28 +18,31 @@
 //! Utility functions for complex field access
 
 use arrow::datatypes::{DataType, Field};
-use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_common::{DataFusionError, Result};
 
 /// Returns the field access indexed by `key` from a [`DataType::List`] or 
[`DataType::Struct`]
 /// # Error
 /// Errors if
-/// * the `data_type` is not a Struct or,
+/// * the `data_type` is not a Struct or a List,

Review Comment:
   I found the new documentation in 
https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.GetIndexedField.html
 to be sufficient. Thank you
   
   



##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -2550,69 +3060,6 @@ mod tests {
         assert_eq!("1-*-3-*-*-6-7-*", result.value(0));
     }
 
-    #[test]

Review Comment:
   why was this test removed?



##########
datafusion/core/tests/user_defined/user_defined_aggregates.rs:
##########
@@ -152,21 +152,6 @@ async fn test_udaf_returning_struct() {
     assert_batches_eq!(expected, &execute(&ctx, sql).await.unwrap());
 }
 
-/// Demonstrate extracting the fields from a structure using a subquery

Review Comment:
   Why was this test removed? 
   
   (this is an important usecase for us in IOx where we have a UDF that returns 
a struct that is accessed by name.



##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -363,6 +363,177 @@ pub fn make_array(arrays: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 }
 
+fn return_empty(return_null: bool, data_type: DataType) -> Arc<dyn Array> {
+    if return_null {
+        new_null_array(&data_type, 1)
+    } else {
+        new_empty_array(&data_type)
+    }
+}
+
+macro_rules! list_slice {
+    ($ARRAY:expr, $I:expr, $J:expr, $RETURN_ELEMENT:expr, $ARRAY_TYPE:ident) 
=> {{
+        let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
+        if $I == 0 && $J == 0 || $ARRAY.is_empty() {
+            return return_empty($RETURN_ELEMENT, $ARRAY.data_type().clone());
+        }
+
+        let i = if $I < 0 {
+            if $I.abs() as usize > array.len() {
+                return return_empty(true, $ARRAY.data_type().clone());
+            }
+
+            (array.len() as i64 + $I + 1) as usize
+        } else {
+            if $I == 0 {
+                1
+            } else {
+                $I as usize
+            }
+        };
+        let j = if $J < 0 {
+            if $J.abs() as usize > array.len() {
+                return return_empty(true, $ARRAY.data_type().clone());
+            }
+
+            if $RETURN_ELEMENT {
+                (array.len() as i64 + $J + 1) as usize
+            } else {
+                (array.len() as i64 + $J) as usize
+            }
+        } else {
+            if $J == 0 {
+                1
+            } else {
+                if $J as usize > array.len() {
+                    array.len()
+                } else {
+                    $J as usize
+                }
+            }
+        };
+
+        if i > j || i as usize > $ARRAY.len() {
+            return_empty($RETURN_ELEMENT, $ARRAY.data_type().clone())
+        } else {
+            Arc::new(array.slice((i - 1), (j + 1 - i)))
+        }
+    }};
+}
+
+macro_rules! slice {

Review Comment:
   I didn't fully understand this implementation -- it seem like picking a 
single value from a list array can be implemented by calculating the 
appropriate offsets and then calling 
[take](https://docs.rs/arrow/latest/arrow/compute/kernels/take/fn.take.html)
   
   For example, given A ListArray like this
   
   
   ```
                                  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
                                                          ┌ ─ ─ ─ ─ ─ ─ ┐    │
    ┌─────────────┐  ┌───────┐    │     ┌───┐   ┌───┐       ┌───┐ ┌───┐       
    │   [A,B,C]   │  │ (0,3) │          │ 1 │   │ 0 │     │ │ 1 │ │ A │ │ 0  │
    ├─────────────┤  ├───────┤    │     ├───┤   ├───┤       ├───┤ ├───┤       
    │ [] (empty)  │  │ (3,3) │          │ 1 │   │ 3 │     │ │ 1 │ │ B │ │ 1  │
    ├─────────────┤  ├───────┤    │     ├───┤   ├───┤       ├───┤ ├───┤       
    │    NULL     │  │ (3,4) │          │ 0 │   │ 4 │     │ │ 1 │ │ C │ │ 2  │
    ├─────────────┤  ├───────┤    │     ├───┤   ├───┤       ├───┤ ├───┤       
    │     [D]     │  │  4,5  │          │ 1 │   │ 5 │     │ │ 1 │ │ ? │ │ 3  │
    ├─────────────┤  ├───────┤    │     ├───┤   ├───┤       ├───┤ ├───┤       
    │  [NULL, F]  │  │  5,7  │          │ 1 │   │ 5 │     │ │ 0 │ │ D │ │ 4  │
    └─────────────┘  └───────┘    │     └───┘   ├───┤       ├───┤ ├───┤       
                                                │ 7 │     │ │ 1 │ │ ? │ │ 5  │
                                  │  Validity   └───┘       ├───┤ ├───┤       
       Logical       Logical         (nulls)   Offsets    │ │ 1 │ │ F │ │ 6  │
        Values       Offsets      │                         └───┘ └───┘       
                                                          │    Values   │    │
                   (offsets[i],   │   ListArray               (Array)         
                  offsets[i+1])                           └ ─ ─ ─ ─ ─ ─ ┘    │
                                  └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
   ```
   
   Computing `col[1]` (aka take the second element) could be computed by adding 
1 to the first logical offset, checking if that is still in the array
   
   So in this example, the take indicies would be
   ```
   [ 
     Some(1),  (add 1 to start of (0, 3) is 1
     None , //   (adding 1 start of (3,3) is 4 which is outside,
     None, // input was null,
     None,  // adding 1 start of (4,5 ) is 5 which is outside
     Some(6), // adding 1 to start of (5,7) is 6
   ]
   ```
   
   This calculation does not depend on datatype of the elements and would be 
fully general
   
   A similar calculation could be done for slice though in that case you would 
have to build up the offsets of the new list array as well as the indices of 
the take of the values. 
   
   In general I think many of these array calculations and manipulations can be 
done in terms of the take kernel



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to