alamb commented on code in PR #12329: URL: https://github.com/apache/datafusion/pull/12329#discussion_r1754784956
########## datafusion/functions-nested/src/extract.rs: ########## @@ -687,3 +695,115 @@ where ); general_array_slice::<O>(array, &from_array, &to_array, None) } + +#[derive(Debug)] +pub(super) struct ArrayAnyValue { + signature: Signature, + aliases: Vec<String>, +} + +impl ArrayAnyValue { + pub fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + aliases: vec![String::from("list_any_value")], + } + } +} + +impl ScalarUDFImpl for ArrayAnyValue { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "array_any_value" + } + fn signature(&self) -> &Signature { + &self.signature + } + fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { + match &arg_types[0] { + List(field) + | LargeList(field) + | FixedSizeList(field, _) => Ok(field.data_type().clone()), + _ => plan_err!( + "array_any_value can only accept List, LargeList or FixedSizeList as the argument" + ), + } + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> { + make_scalar_function(array_any_value_inner)(args) + } + fn aliases(&self) -> &[String] { + &self.aliases + } +} + +fn array_any_value_inner(args: &[ArrayRef]) -> Result<ArrayRef> { + if args.len() != 1 { + return exec_err!("array_any_value expects one argument"); + } + + match &args[0].data_type() { + List(_) => { + let array = as_list_array(&args[0])?; + general_array_any_value::<i32>(array) + } + LargeList(_) => { + let array = as_large_list_array(&args[0])?; + general_array_any_value::<i64>(array) + } + data_type => exec_err!("array_any_value does not support type: {:?}", data_type), + } +} + +fn general_array_any_value<O: OffsetSizeTrait>( + array: &GenericListArray<O>, +) -> Result<ArrayRef> +where + i64: TryInto<O>, +{ + let values = array.values(); + let original_data = values.to_data(); + let capacity = Capacities::Array(array.len()); + + let mut mutable = + MutableArrayData::with_capacities(vec![&original_data], true, capacity); + + for (row_index, offset_window) in array.offsets().windows(2).enumerate() { + let start = offset_window[0]; + let end = offset_window[1]; + let len = end - start; + + // array is null + if len == O::usize_as(0) { + mutable.extend_nulls(1); + continue; + } + + let row_value = array.value(row_index); + match row_value.nulls() { + Some(row_nulls_buffer) => { + // nulls are present in the array so try to take the first valid element + if let Some(first_non_null_index) = Review Comment: You are probably right -- I think we should adda test case to cover and we will be all good (e.g. pass in a an array like `NULL, NULL, 1, 2`) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org