alamb commented on code in PR #8744:
URL: https://github.com/apache/arrow-datafusion/pull/8744#discussion_r1446623400


##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -2560,6 +2562,101 @@ pub fn array_distinct(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 }
 
+pub fn array_resize(arg: &[ArrayRef]) -> Result<ArrayRef> {
+    if arg.len() < 2 || arg.len() > 3 {
+        return exec_err!("array_resize needs two or three arguments");
+    }
+
+    let new_len = as_int64_array(&arg[1])?;
+    let new_element = if arg.len() == 3 {
+        Some(arg[2].clone())
+    } else {
+        None
+    };
+
+    match &arg[0].data_type() {
+        DataType::List(field) => {
+            let array = as_list_array(&arg[0])?;
+            general_list_resize::<i32>(array, new_len, field, new_element)
+        }
+        DataType::LargeList(field) => {
+            let array = as_large_list_array(&arg[0])?;
+            general_list_resize::<i64>(array, new_len, field, new_element)
+        }
+        array_type => exec_err!("array_resize does not support type 
'{array_type:?}'."),
+    }
+}
+
+fn general_list_resize<O: OffsetSizeTrait>(
+    array: &GenericListArray<O>,
+    count_array: &Int64Array,
+    field: &FieldRef,
+    default_element: Option<ArrayRef>,
+) -> Result<ArrayRef> {
+    let mut offsets = vec![O::usize_as(0)];
+    let mut new_arrays = vec![];
+
+    let dt = array.value_type();
+    let converter = RowConverter::new(vec![SortField::new(dt.clone())])?;
+    let default_element = if let Some(default_element) = default_element {
+        default_element
+    } else {
+        empty_list(&dt)?
+    };
+    let rows = converter.convert_columns(&[default_element.clone()])?;
+
+    for (index, arr) in array.iter().enumerate() {

Review Comment:
   Instead of `RowConverter` which will copy the data twice, you can probably 
use `MutableArrayData` to create the output directly



##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -2560,6 +2562,101 @@ pub fn array_distinct(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 }
 
+pub fn array_resize(arg: &[ArrayRef]) -> Result<ArrayRef> {
+    if arg.len() < 2 || arg.len() > 3 {
+        return exec_err!("array_resize needs two or three arguments");
+    }
+
+    let new_len = as_int64_array(&arg[1])?;
+    let new_element = if arg.len() == 3 {
+        Some(arg[2].clone())
+    } else {
+        None
+    };
+
+    match &arg[0].data_type() {
+        DataType::List(field) => {
+            let array = as_list_array(&arg[0])?;
+            general_list_resize::<i32>(array, new_len, field, new_element)
+        }
+        DataType::LargeList(field) => {
+            let array = as_large_list_array(&arg[0])?;
+            general_list_resize::<i64>(array, new_len, field, new_element)
+        }
+        array_type => exec_err!("array_resize does not support type 
'{array_type:?}'."),
+    }
+}
+
+fn general_list_resize<O: OffsetSizeTrait>(
+    array: &GenericListArray<O>,
+    count_array: &Int64Array,
+    field: &FieldRef,
+    default_element: Option<ArrayRef>,
+) -> Result<ArrayRef> {
+    let mut offsets = vec![O::usize_as(0)];
+    let mut new_arrays = vec![];
+
+    let dt = array.value_type();
+    let converter = RowConverter::new(vec![SortField::new(dt.clone())])?;
+    let default_element = if let Some(default_element) = default_element {

Review Comment:
   I think if you used ScalarValue here (rather than Option<ArrayRef>) you can 
probably avoid empty_list entirely



##########
datafusion/common/src/utils.rs:
##########
@@ -492,6 +496,44 @@ pub fn list_ndims(data_type: &DataType) -> u64 {
     }
 }
 
+/// Create an new empty array based on the given data type.

Review Comment:
   Technically this list isn't empty, is it ? It contains a single `Null` value
   
   Also, it seems like this handles all data types, not just a list. I wonder 
if the same thing could be done by making a null scalar like
   
   ```rust
   let null_scalar = ScalarValue::try_from(&data_type)?;
   null_scalar.to_array_of_size(1)
   ```
   🤔 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to