This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new c929a1cd13 ArraySort: support structs (#15527) c929a1cd13 is described below commit c929a1cd133590e4944bc2c7900611220450335a Author: cht42 <42912042+ch...@users.noreply.github.com> AuthorDate: Thu Apr 3 00:14:55 2025 +0400 ArraySort: support structs (#15527) * ArraySort: support structs * fix * fix * fix * Update datafusion/functions-nested/src/sort.rs --------- Co-authored-by: Andrew Lamb <and...@nerdnetworks.org> --- datafusion/functions-nested/src/sort.rs | 20 ++++++++++++++++++-- datafusion/sqllogictest/test_files/array.slt | 5 +++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-nested/src/sort.rs b/datafusion/functions-nested/src/sort.rs index 1db245fe52..85737ef135 100644 --- a/datafusion/functions-nested/src/sort.rs +++ b/datafusion/functions-nested/src/sort.rs @@ -20,6 +20,7 @@ use crate::utils::make_scalar_function; use arrow::array::{new_null_array, Array, ArrayRef, ListArray, NullBufferBuilder}; use arrow::buffer::OffsetBuffer; +use arrow::compute::SortColumn; use arrow::datatypes::DataType::{FixedSizeList, LargeList, List}; use arrow::datatypes::{DataType, Field}; use arrow::{compute, compute::SortOptions}; @@ -207,9 +208,24 @@ pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> { valid.append_null(); } else { let arr_ref = list_array.value(i); - let arr_ref = arr_ref.as_ref(); - let sorted_array = compute::sort(arr_ref, sort_option)?; + // arrow sort kernel does not support Structs, so use + // lexsort_to_indices instead: + // https://github.com/apache/arrow-rs/issues/6911#issuecomment-2562928843 + let sorted_array = match arr_ref.data_type() { + DataType::Struct(_) => { + let sort_columns: Vec<SortColumn> = vec![SortColumn { + values: Arc::clone(&arr_ref), + options: sort_option, + }]; + let indices = compute::lexsort_to_indices(&sort_columns, None)?; + compute::take(arr_ref.as_ref(), &indices, None)? + } + _ => { + let arr_ref = arr_ref.as_ref(); + compute::sort(arr_ref, sort_option)? + } + }; array_lengths.push(sorted_array.len()); arrays.push(sorted_array); valid.append_non_null(); diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index cb56686b64..f9bbcedff5 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -2396,6 +2396,11 @@ NULL NULL NULL NULL NULL NULL +query ? +select array_sort([struct('foo', 3), struct('foo', 1), struct('bar', 1)]) +---- +[{c0: bar, c1: 1}, {c0: foo, c1: 1}, {c0: foo, c1: 3}] + ## test with argument of incorrect types query error DataFusion error: Execution error: the second parameter of array_sort expects DESC or ASC select array_sort([1, 3, null, 5, NULL, -5], 1), array_sort([1, 3, null, 5, NULL, -5], 'DESC', 1), array_sort([1, 3, null, 5, NULL, -5], 1, 1); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org