alamb commented on code in PR #8744: URL: https://github.com/apache/arrow-datafusion/pull/8744#discussion_r1446623400
########## datafusion/physical-expr/src/array_expressions.rs: ########## @@ -2560,6 +2562,101 @@ pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> { } } +pub fn array_resize(arg: &[ArrayRef]) -> Result<ArrayRef> { + if arg.len() < 2 || arg.len() > 3 { + return exec_err!("array_resize needs two or three arguments"); + } + + let new_len = as_int64_array(&arg[1])?; + let new_element = if arg.len() == 3 { + Some(arg[2].clone()) + } else { + None + }; + + match &arg[0].data_type() { + DataType::List(field) => { + let array = as_list_array(&arg[0])?; + general_list_resize::<i32>(array, new_len, field, new_element) + } + DataType::LargeList(field) => { + let array = as_large_list_array(&arg[0])?; + general_list_resize::<i64>(array, new_len, field, new_element) + } + array_type => exec_err!("array_resize does not support type '{array_type:?}'."), + } +} + +fn general_list_resize<O: OffsetSizeTrait>( + array: &GenericListArray<O>, + count_array: &Int64Array, + field: &FieldRef, + default_element: Option<ArrayRef>, +) -> Result<ArrayRef> { + let mut offsets = vec![O::usize_as(0)]; + let mut new_arrays = vec![]; + + let dt = array.value_type(); + let converter = RowConverter::new(vec![SortField::new(dt.clone())])?; + let default_element = if let Some(default_element) = default_element { + default_element + } else { + empty_list(&dt)? + }; + let rows = converter.convert_columns(&[default_element.clone()])?; + + for (index, arr) in array.iter().enumerate() { Review Comment: Instead of `RowConverter` which will copy the data twice, you can probably use `MutableArrayData` to create the output directly ########## datafusion/physical-expr/src/array_expressions.rs: ########## @@ -2560,6 +2562,101 @@ pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> { } } +pub fn array_resize(arg: &[ArrayRef]) -> Result<ArrayRef> { + if arg.len() < 2 || arg.len() > 3 { + return exec_err!("array_resize needs two or three arguments"); + } + + let new_len = as_int64_array(&arg[1])?; + let new_element = if arg.len() == 3 { + Some(arg[2].clone()) + } else { + None + }; + + match &arg[0].data_type() { + DataType::List(field) => { + let array = as_list_array(&arg[0])?; + general_list_resize::<i32>(array, new_len, field, new_element) + } + DataType::LargeList(field) => { + let array = as_large_list_array(&arg[0])?; + general_list_resize::<i64>(array, new_len, field, new_element) + } + array_type => exec_err!("array_resize does not support type '{array_type:?}'."), + } +} + +fn general_list_resize<O: OffsetSizeTrait>( + array: &GenericListArray<O>, + count_array: &Int64Array, + field: &FieldRef, + default_element: Option<ArrayRef>, +) -> Result<ArrayRef> { + let mut offsets = vec![O::usize_as(0)]; + let mut new_arrays = vec![]; + + let dt = array.value_type(); + let converter = RowConverter::new(vec![SortField::new(dt.clone())])?; + let default_element = if let Some(default_element) = default_element { Review Comment: I think if you used ScalarValue here (rather than Option<ArrayRef>) you can probably avoid empty_list entirely ########## datafusion/common/src/utils.rs: ########## @@ -492,6 +496,44 @@ pub fn list_ndims(data_type: &DataType) -> u64 { } } +/// Create an new empty array based on the given data type. Review Comment: Technically this list isn't empty, is it ? It contains a single `Null` value Also, it seems like this handles all data types, not just a list. I wonder if the same thing could be done by making a null scalar like ```rust let null_scalar = ScalarValue::try_from(&data_type)?; null_scalar.to_array_of_size(1) ``` 🤔 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org