This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 3764fe760a Fix array_concat with NULL arrays (#16348) 3764fe760a is described below commit 3764fe760ad9b1e2cbff0aabbd46dd1de59a98a8 Author: alexanderbianchi <75697973+alexanderbian...@users.noreply.github.com> AuthorDate: Thu Jun 12 08:24:48 2025 -0400 Fix array_concat with NULL arrays (#16348) * Fix array_concat with NULL arrays to avoid Arrow concat error - Fix array_concat_inner to properly handle NULL list arrays by checking null_count - Add logic to create properly typed empty arrays for all-null inputs - Support NULL handling for List, LargeList, and FixedSizeList types - Add comprehensive test coverage for NULL array scenarios Fixes issue where 'select array_concat(NULL::integer[])' would throw 'Arrow error: Compute error: concat requires input of at least one array' * Simplify null count check and null array generation --- datafusion/functions-nested/src/concat.rs | 20 ++++++++++++---- datafusion/sqllogictest/test_files/array.slt | 36 ++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs index dd8784d36c..e8b7fc27b4 100644 --- a/datafusion/functions-nested/src/concat.rs +++ b/datafusion/functions-nested/src/concat.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use crate::make_array::make_array_inner; use crate::utils::{align_array_dimensions, check_datatypes, make_scalar_function}; use arrow::array::{ - Array, ArrayRef, Capacities, GenericListArray, MutableArrayData, NullArray, + Array, ArrayData, ArrayRef, Capacities, GenericListArray, MutableArrayData, NullBufferBuilder, OffsetSizeTrait, }; use arrow::buffer::OffsetBuffer; @@ -42,6 +42,7 @@ use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; use datafusion_macros::user_doc; +use itertools::Itertools; make_udf_expr_and_func!( ArrayAppend, @@ -364,12 +365,23 @@ pub(crate) fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> { DataType::LargeList(_) => large_list = true, _ => (), } - - all_null = false + if arg.null_count() < arg.len() { + all_null = false; + } } if all_null { - Ok(Arc::new(NullArray::new(args[0].len()))) + // Return a null array with the same type as the first non-null-type argument + let return_type = args + .iter() + .map(|arg| arg.data_type()) + .find_or_first(|d| !d.is_null()) + .unwrap(); // Safe because args is non-empty + + Ok(arrow::array::make_array(ArrayData::new_null( + return_type, + args[0].len(), + ))) } else if large_list { concat_internal::<i64>(args) } else { diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 3f0233325e..0139daecca 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -3070,6 +3070,42 @@ select array_concat([]); ---- [] +# test with NULL array +query ? +select array_concat(NULL::integer[]); +---- +NULL + +# test with multiple NULL arrays +query ? +select array_concat(NULL::integer[], NULL::integer[]); +---- +NULL + +# test with NULL LargeList +query ? +select array_concat(arrow_cast(NULL::string[], 'LargeList(Utf8)')); +---- +NULL + +# test with NULL FixedSizeList +query ? +select array_concat(arrow_cast(NULL::string[], 'FixedSizeList(2, Utf8)')); +---- +NULL + +# test with mix of NULL and empty arrays +query ? +select array_concat(NULL::integer[], []); +---- +[] + +# test with mix of NULL and non-empty arrays +query ? +select array_concat(NULL::integer[], [1, 2, 3]); +---- +[1, 2, 3] + # Concatenating strings arrays query ? select array_concat( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org