This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 10d8bcba63 Add support for ListView in unnest (#20760)
10d8bcba63 is described below
commit 10d8bcba6352e5fff582159c1a7adf022b4269be
Author: Frederic Branczyk <[email protected]>
AuthorDate: Fri Mar 13 20:52:21 2026 +0100
Add support for ListView in unnest (#20760)
## Which issue does this PR close?
Closes #20759
## What changes are included in this PR?
Support for unnest
## Are these changes tested?
Yes, via SLTs.
## Are there any user-facing changes?
No, only net-new support for existing APIs.
---
datafusion/common/src/scalar/mod.rs | 16 +++++
datafusion/expr/src/expr_schema.rs | 4 +-
datafusion/expr/src/logical_plan/plan.rs | 14 +++-
datafusion/physical-plan/src/unnest.rs | 33 ++++++++-
datafusion/sql/src/expr/function.rs | 2 +
datafusion/sql/src/utils.rs | 4 +-
datafusion/sqllogictest/test_files/unnest.slt | 96 +++++++++++++++++++++++++++
7 files changed, 163 insertions(+), 6 deletions(-)
diff --git a/datafusion/common/src/scalar/mod.rs
b/datafusion/common/src/scalar/mod.rs
index d759bbedd9..95d8a8511b 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -3676,6 +3676,22 @@ impl ScalarValue {
.with_field(field)
.build_fixed_size_list_scalar(list_size)
}
+ DataType::ListView(field) => {
+ let list_array = array.as_list_view::<i32>();
+ let nested_array = list_array.value(index);
+ // Store as List scalar since ScalarValue has no ListView
variant.
+ SingleRowListArrayBuilder::new(nested_array)
+ .with_field(field)
+ .build_list_scalar()
+ }
+ DataType::LargeListView(field) => {
+ let list_array = array.as_list_view::<i64>();
+ let nested_array = list_array.value(index);
+ // Store as LargeList scalar since ScalarValue has no
LargeListView variant.
+ SingleRowListArrayBuilder::new(nested_array)
+ .with_field(field)
+ .build_large_list_scalar()
+ }
DataType::Date32 => typed_cast!(array, index, as_date32_array,
Date32)?,
DataType::Date64 => typed_cast!(array, index, as_date64_array,
Date64)?,
DataType::Time32(TimeUnit::Second) => {
diff --git a/datafusion/expr/src/expr_schema.rs
b/datafusion/expr/src/expr_schema.rs
index 4168310002..92b78b1579 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -141,7 +141,9 @@ impl ExprSchemable for Expr {
match arg_data_type {
DataType::List(field)
| DataType::LargeList(field)
- | DataType::FixedSizeList(field, _) =>
Ok(field.data_type().clone()),
+ | DataType::FixedSizeList(field, _)
+ | DataType::ListView(field)
+ | DataType::LargeListView(field) =>
Ok(field.data_type().clone()),
DataType::Struct(_) => Ok(arg_data_type),
DataType::Null => {
not_impl_err!("unnest() does not support null yet")
diff --git a/datafusion/expr/src/logical_plan/plan.rs
b/datafusion/expr/src/logical_plan/plan.rs
index dc4daf1ab7..fe8a8dd870 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -4194,7 +4194,9 @@ impl Unnest {
}
DataType::List(_)
| DataType::FixedSizeList(_, _)
- | DataType::LargeList(_) => {
+ | DataType::LargeList(_)
+ | DataType::ListView(_)
+ | DataType::LargeListView(_) => {
list_columns.push((
index,
ColumnUnnestList {
@@ -4269,7 +4271,11 @@ fn get_unnested_columns(
let mut qualified_columns = Vec::with_capacity(1);
match data_type {
- DataType::List(_) | DataType::FixedSizeList(_, _) |
DataType::LargeList(_) => {
+ DataType::List(_)
+ | DataType::FixedSizeList(_, _)
+ | DataType::LargeList(_)
+ | DataType::ListView(_)
+ | DataType::LargeListView(_) => {
let data_type = get_unnested_list_datatype_recursive(data_type,
depth)?;
let new_field = Arc::new(Field::new(
col_name, data_type,
@@ -4306,7 +4312,9 @@ fn get_unnested_list_datatype_recursive(
match data_type {
DataType::List(field)
| DataType::FixedSizeList(field, _)
- | DataType::LargeList(field) => {
+ | DataType::LargeList(field)
+ | DataType::ListView(field)
+ | DataType::LargeListView(field) => {
if depth == 1 {
return Ok(field.data_type().clone());
}
diff --git a/datafusion/physical-plan/src/unnest.rs
b/datafusion/physical-plan/src/unnest.rs
index 48de79b741..8579925018 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -33,7 +33,8 @@ use crate::{
use arrow::array::{
Array, ArrayRef, AsArray, BooleanBufferBuilder, FixedSizeListArray,
Int64Array,
- LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray,
new_null_array,
+ LargeListArray, LargeListViewArray, ListArray, ListViewArray,
PrimitiveArray, Scalar,
+ StructArray, new_null_array,
};
use arrow::compute::kernels::length::length;
use arrow::compute::kernels::zip::zip;
@@ -845,6 +846,30 @@ impl ListArrayType for FixedSizeListArray {
}
}
+impl ListArrayType for ListViewArray {
+ fn values(&self) -> &ArrayRef {
+ self.values()
+ }
+
+ fn value_offsets(&self, row: usize) -> (i64, i64) {
+ let offset = self.value_offsets()[row] as i64;
+ let size = self.value_sizes()[row] as i64;
+ (offset, offset + size)
+ }
+}
+
+impl ListArrayType for LargeListViewArray {
+ fn values(&self) -> &ArrayRef {
+ self.values()
+ }
+
+ fn value_offsets(&self, row: usize) -> (i64, i64) {
+ let offset = self.value_offsets()[row];
+ let size = self.value_sizes()[row];
+ (offset, offset + size)
+ }
+}
+
/// Unnest multiple list arrays according to the length array.
fn unnest_list_arrays(
list_arrays: &[ArrayRef],
@@ -861,6 +886,12 @@ fn unnest_list_arrays(
DataType::FixedSizeList(_, _) => {
Ok(list_array.as_fixed_size_list() as &dyn ListArrayType)
}
+ DataType::ListView(_) => {
+ Ok(list_array.as_list_view::<i32>() as &dyn ListArrayType)
+ }
+ DataType::LargeListView(_) => {
+ Ok(list_array.as_list_view::<i64>() as &dyn ListArrayType)
+ }
other => exec_err!("Invalid unnest datatype {other }"),
})
.collect::<Result<Vec<_>>>()?;
diff --git a/datafusion/sql/src/expr/function.rs
b/datafusion/sql/src/expr/function.rs
index c81575366f..3ec699ae57 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -905,6 +905,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
DataType::List(_)
| DataType::LargeList(_)
| DataType::FixedSizeList(_, _)
+ | DataType::ListView(_)
+ | DataType::LargeListView(_)
| DataType::Struct(_) => Ok(()),
DataType::Null => {
not_impl_err!("unnest() does not support null yet")
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 16ac353d4b..1a76dd69f4 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -466,7 +466,9 @@ impl RecursiveUnnestRewriter<'_> {
}
DataType::List(_)
| DataType::FixedSizeList(_, _)
- | DataType::LargeList(_) => {
+ | DataType::LargeList(_)
+ | DataType::ListView(_)
+ | DataType::LargeListView(_) => {
push_projection_dedupl(
self.inner_projection_exprs,
expr_in_unnest.clone().alias(placeholder_name.clone()),
diff --git a/datafusion/sqllogictest/test_files/unnest.slt
b/datafusion/sqllogictest/test_files/unnest.slt
index 73aeb6c99d..ba499679a9 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -1233,3 +1233,99 @@ physical_plan
# cleanup
statement ok
drop table t;
+
+########################################
+# Unnest ListView / LargeListView Tests #
+########################################
+
+## Basic unnest ListView in select list
+query I
+select unnest(arrow_cast([1,2,3], 'ListView(Int64)'));
+----
+1
+2
+3
+
+## Basic unnest ListView in from clause
+query I
+select * from unnest(arrow_cast([1,2,3], 'ListView(Int64)'));
+----
+1
+2
+3
+
+## Basic unnest LargeListView in select list
+query I
+select unnest(arrow_cast([1,2,3], 'LargeListView(Int64)'));
+----
+1
+2
+3
+
+## Basic unnest LargeListView in from clause
+query I
+select * from unnest(arrow_cast([1,2,3], 'LargeListView(Int64)'));
+----
+1
+2
+3
+
+## Unnest ListView with range
+query I
+select unnest(arrow_cast(range(1, 3), 'ListView(Int64)'));
+----
+1
+2
+
+## Unnest LargeListView with range
+query I
+select * from unnest(arrow_cast(range(1, 3), 'LargeListView(Int64)'));
+----
+1
+2
+
+## Multiple unnest with ListView columns from a table
+query III
+select
+ unnest(column1),
+ unnest(arrow_cast(column2, 'ListView(Int64)')),
+ unnest(arrow_cast(column4, 'LargeListView(Int64)'))
+from unnest_table where column4 is not null;
+----
+1 7 13
+2 NULL 14
+3 NULL NULL
+4 8 15
+5 9 16
+NULL 10 NULL
+NULL NULL 17
+NULL NULL 18
+
+## Unnest ListView with null elements
+query I
+select unnest(arrow_cast([1, null, 3], 'ListView(Int64)'));
+----
+1
+NULL
+3
+
+## Unnest empty ListView
+query I
+select unnest(arrow_cast([], 'ListView(Int64)'));
+----
+
+## Unnest ListView of strings
+query T
+select unnest(arrow_cast(['a','b','c'], 'ListView(Utf8)'));
+----
+a
+b
+c
+
+## Unnest LargeListView of strings
+query T
+select unnest(arrow_cast(['a','b','c'], 'LargeListView(Utf8)'));
+----
+a
+b
+c
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]