This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 0430664db2 add `try_new_with_length` constructor to `FixedSizeList`
(#8624)
0430664db2 is described below
commit 0430664db2f2b295bf6e87177a2a88279f1da6aa
Author: Connor Tsui <[email protected]>
AuthorDate: Wed Nov 5 12:57:42 2025 -0500
add `try_new_with_length` constructor to `FixedSizeList` (#8624)
Closes https://github.com/apache/arrow-rs/issues/8623
As a first step for fixing #8623, this PR introduces a
`try_new_with_length` constructor for `FixedSizeListArray` that takes in
a `len` parameter for the specific case of non-nullable, degenerate
arrays.
To be honest, I think updating the existing constructors makes more
sense (breaking change), but this is an easier first step because we can
just make `try_new_with_length` the new `try_new` from here.
I also fixed 2 cases of an existing unit test that were wrong, as well
as added an extra test for the degenerate case.
Edit: I realized that the constructors also do not check if the list
size correctly divides the input `values.len()` in `try_new`, so added a
check for that plus more tests.
Signed-off-by: Connor Tsui <[email protected]>
---
arrow-array/src/array/fixed_size_list_array.rs | 168 +++++++++++++++++++++----
1 file changed, 143 insertions(+), 25 deletions(-)
diff --git a/arrow-array/src/array/fixed_size_list_array.rs
b/arrow-array/src/array/fixed_size_list_array.rs
index 379d8fe180..ddaaac80dc 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -125,7 +125,15 @@ pub struct FixedSizeListArray {
}
impl FixedSizeListArray {
- /// Create a new [`FixedSizeListArray`] with `size` element size,
panicking on failure
+ /// Create a new [`FixedSizeListArray`] with `size` element size,
panicking on failure.
+ ///
+ /// Note that if `size == 0` and `nulls` is `None` (a degenerate,
non-nullable
+ /// `FixedSizeListArray`), this function will set the length of the array
to 0.
+ ///
+ /// If you would like to have a degenerate, non-nullable
`FixedSizeListArray` with arbitrary
+ /// length, use the [`try_new_with_length()`] constructor.
+ ///
+ /// [`try_new_with_length()`]: Self::try_new_with_length
///
/// # Panics
///
@@ -134,12 +142,20 @@ impl FixedSizeListArray {
Self::try_new(field, size, values, nulls).unwrap()
}
- /// Create a new [`FixedSizeListArray`] from the provided parts, returning
an error on failure
+ /// Create a new [`FixedSizeListArray`] from the provided parts, returning
an error on failure.
+ ///
+ /// Note that if `size == 0` and `nulls` is `None` (a degenerate,
non-nullable
+ /// `FixedSizeListArray`), this function will set the length of the array
to 0.
+ ///
+ /// If you would like to have a degenerate, non-nullable
`FixedSizeListArray` with arbitrary
+ /// length, use the [`try_new_with_length()`] constructor.
+ ///
+ /// [`try_new_with_length()`]: Self::try_new_with_length
///
/// # Errors
///
/// * `size < 0`
- /// * `values.len() / size != nulls.len()`
+ /// * `values.len() != nulls.len() * size` if `nulls` is `Some`
/// * `values.data_type() != field.data_type()`
/// * `!field.is_nullable() &&
!nulls.expand(size).contains(values.logical_nulls())`
pub fn try_new(
@@ -152,22 +168,88 @@ impl FixedSizeListArray {
ArrowError::InvalidArgumentError(format!("Size cannot be negative,
got {size}"))
})?;
- let len = match s {
- 0 => nulls.as_ref().map(|x| x.len()).unwrap_or_default(),
- _ => {
- let len = values.len() / s.max(1);
- if let Some(n) = nulls.as_ref() {
- if n.len() != len {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Incorrect length of null buffer for
FixedSizeListArray, expected {} got {}",
- len,
- n.len(),
- )));
- }
+ if s == 0 {
+ // Note that for degenerate (`size == 0`) and non-nullable
`FixedSizeList`s, we will set
+ // the length to 0 (`_or_default`).
+ let len = nulls.as_ref().map(|x| x.len()).unwrap_or_default();
+
+ Self::try_new_with_length(field, size, values, nulls, len)
+ } else {
+ if values.len() % s != 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Incorrect length of values buffer for FixedSizeListArray,
\
+ expected a multiple of {s} got {}",
+ values.len(),
+ )));
+ }
+
+ let len = values.len() / s;
+
+ // Check that the null buffer length is correct (if it exists).
+ if let Some(null_buffer) = &nulls {
+ if s * null_buffer.len() != values.len() {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Incorrect length of values buffer for
FixedSizeListArray, \
+ expected {} got {}",
+ s * null_buffer.len(),
+ values.len(),
+ )));
}
- len
}
- };
+
+ Self::try_new_with_length(field, size, values, nulls, len)
+ }
+ }
+
+ /// Create a new [`FixedSizeListArray`] from the provided parts, returning
an error on failure.
+ ///
+ /// This method exists to allow the construction of arbitrary length
degenerate (`size == 0`)
+ /// and non-nullable `FixedSizeListArray`s. If you want a nullable
`FixedSizeListArray`, then
+ /// you can use [`try_new()`] instead.
+ ///
+ /// [`try_new()`]: Self::try_new
+ ///
+ /// # Errors
+ ///
+ /// * `size < 0`
+ /// * `nulls.len() != len` if `nulls` is `Some`
+ /// * `values.len() != len * size`
+ /// * `values.data_type() != field.data_type()`
+ /// * `!field.is_nullable() &&
!nulls.expand(size).contains(values.logical_nulls())`
+ pub fn try_new_with_length(
+ field: FieldRef,
+ size: i32,
+ values: ArrayRef,
+ nulls: Option<NullBuffer>,
+ len: usize,
+ ) -> Result<Self, ArrowError> {
+ let s = size.to_usize().ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!("Size cannot be negative,
got {size}"))
+ })?;
+
+ if let Some(null_buffer) = &nulls {
+ if null_buffer.len() != len {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid null buffer for FixedSizeListArray, expected
{len} found {}",
+ null_buffer.len()
+ )));
+ }
+ }
+
+ if s == 0 && !values.is_empty() {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "An degenerate FixedSizeListArray should have no underlying
values, found {} values",
+ values.len()
+ )));
+ }
+
+ if values.len() != len * s {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Incorrect length of values buffer for FixedSizeListArray,
expected {} got {}",
+ len * s,
+ values.len(),
+ )));
+ }
if field.data_type() != values.data_type() {
return Err(ArrowError::InvalidArgumentError(format!(
@@ -673,8 +755,23 @@ mod tests {
let list = FixedSizeListArray::new(field.clone(), 2, values.clone(),
Some(nulls));
assert_eq!(list.len(), 3);
- let list = FixedSizeListArray::new(field.clone(), 4, values.clone(),
None);
- assert_eq!(list.len(), 1);
+ let list = FixedSizeListArray::new(field.clone(), 3, values.clone(),
None);
+ assert_eq!(list.len(), 2);
+
+ let err = FixedSizeListArray::try_new(field.clone(), 4,
values.clone(), None).unwrap_err();
+ assert_eq!(
+ err.to_string(),
+ "Invalid argument error: Incorrect length of values buffer for
FixedSizeListArray, \
+ expected a multiple of 4 got 6",
+ );
+
+ let err =
+ FixedSizeListArray::try_new_with_length(field.clone(), 4,
values.clone(), None, 1)
+ .unwrap_err();
+ assert_eq!(
+ err.to_string(),
+ "Invalid argument error: Incorrect length of values buffer for
FixedSizeListArray, expected 4 got 6"
+ );
let err = FixedSizeListArray::try_new(field.clone(), -1,
values.clone(), None).unwrap_err();
assert_eq!(
@@ -682,14 +779,11 @@ mod tests {
"Invalid argument error: Size cannot be negative, got -1"
);
- let list = FixedSizeListArray::new(field.clone(), 0, values.clone(),
None);
- assert_eq!(list.len(), 0);
-
let nulls = NullBuffer::new_null(2);
let err = FixedSizeListArray::try_new(field, 2, values.clone(),
Some(nulls)).unwrap_err();
assert_eq!(
err.to_string(),
- "Invalid argument error: Incorrect length of null buffer for
FixedSizeListArray, expected 3 got 2"
+ "Invalid argument error: Incorrect length of values buffer for
FixedSizeListArray, expected 4 got 6"
);
let field = Arc::new(Field::new_list_field(DataType::Int32, false));
@@ -712,11 +806,35 @@ mod tests {
}
#[test]
- fn empty_fixed_size_list() {
+ fn degenerate_fixed_size_list() {
let field = Arc::new(Field::new_list_field(DataType::Int32, true));
let nulls = NullBuffer::new_null(2);
let values = new_empty_array(&DataType::Int32);
- let list = FixedSizeListArray::new(field.clone(), 0, values,
Some(nulls));
+ let list = FixedSizeListArray::new(field.clone(), 0, values.clone(),
Some(nulls.clone()));
assert_eq!(list.len(), 2);
+
+ // Test invalid null buffer length.
+ let err = FixedSizeListArray::try_new_with_length(
+ field.clone(),
+ 0,
+ values.clone(),
+ Some(nulls),
+ 5,
+ )
+ .unwrap_err();
+ assert_eq!(
+ err.to_string(),
+ "Invalid argument error: Invalid null buffer for
FixedSizeListArray, expected 5 found 2"
+ );
+
+ // Test non-empty values for degenerate list.
+ let non_empty_values = Arc::new(Int32Array::from(vec![1, 2, 3]));
+ let err =
+ FixedSizeListArray::try_new_with_length(field.clone(), 0,
non_empty_values, None, 3)
+ .unwrap_err();
+ assert_eq!(
+ err.to_string(),
+ "Invalid argument error: An degenerate FixedSizeListArray should
have no underlying values, found 3 values"
+ );
}
}