This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 755616fd36d Use FixedSizeListArray::new in FixedSizeListBuilder (#5612)
755616fd36d is described below
commit 755616fd36d710390758742c22a523c1ad8ef778
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Apr 9 12:33:18 2024 +0100
Use FixedSizeListArray::new in FixedSizeListBuilder (#5612)
* Use FixedSizeListArray::new in FixedSizeListBuilder
* Fix handling of entirely null zero-sized list array (#5614)
* Fix
---
arrow-array/src/array/fixed_size_list_array.rs | 37 +++++--
arrow-array/src/builder/fixed_size_list_builder.rs | 121 +++++----------------
2 files changed, 52 insertions(+), 106 deletions(-)
diff --git a/arrow-array/src/array/fixed_size_list_array.rs
b/arrow-array/src/array/fixed_size_list_array.rs
index f8f01516e3d..1d07daf4cc7 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -152,16 +152,22 @@ impl FixedSizeListArray {
ArrowError::InvalidArgumentError(format!("Size cannot be negative,
got {}", size))
})?;
- let len = values.len() / s.max(1);
- if let Some(n) = nulls.as_ref() {
- if n.len() != len {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Incorrect length of null buffer for FixedSizeListArray,
expected {} got {}",
- len,
- n.len(),
- )));
+ let len = match s {
+ 0 => nulls.as_ref().map(|x| x.len()).unwrap_or_default(),
+ _ => {
+ let len = values.len() / s.max(1);
+ if let Some(n) = nulls.as_ref() {
+ if n.len() != len {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Incorrect length of null buffer for
FixedSizeListArray, expected {} got {}",
+ len,
+ n.len(),
+ )));
+ }
+ }
+ len
}
- }
+ };
if field.data_type() != values.data_type() {
return Err(ArrowError::InvalidArgumentError(format!(
@@ -460,7 +466,7 @@ mod tests {
use crate::cast::AsArray;
use crate::types::Int32Type;
- use crate::Int32Array;
+ use crate::{new_empty_array, Int32Array};
use super::*;
@@ -656,7 +662,7 @@ mod tests {
);
let list = FixedSizeListArray::new(field.clone(), 0, values.clone(),
None);
- assert_eq!(list.len(), 6);
+ assert_eq!(list.len(), 0);
let nulls = NullBuffer::new_null(2);
let err = FixedSizeListArray::try_new(field, 2, values.clone(),
Some(nulls)).unwrap_err();
@@ -674,4 +680,13 @@ mod tests {
let err = FixedSizeListArray::try_new(field, 2, values,
None).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error:
FixedSizeListArray expected data type Int64 got Int32 for \"item\"");
}
+
+ #[test]
+ fn empty_fixed_size_list() {
+ let field = Arc::new(Field::new("item", DataType::Int32, true));
+ let nulls = NullBuffer::new_null(2);
+ let values = new_empty_array(&DataType::Int32);
+ let list = FixedSizeListArray::new(field.clone(), 0, values,
Some(nulls));
+ assert_eq!(list.len(), 2);
+ }
}
diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs
b/arrow-array/src/builder/fixed_size_list_builder.rs
index 3d79a763e9d..f65947bfff3 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -18,8 +18,7 @@
use crate::builder::ArrayBuilder;
use crate::{ArrayRef, FixedSizeListArray};
use arrow_buffer::NullBufferBuilder;
-use arrow_data::ArrayData;
-use arrow_schema::{DataType, Field, FieldRef};
+use arrow_schema::{Field, FieldRef};
use std::any::Any;
use std::sync::Arc;
@@ -94,9 +93,9 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
}
}
- /// Override the field passed to [`ArrayData::builder`]
+ /// Override the field passed to [`FixedSizeListArray::new`]
///
- /// By default a nullable field is created with the name `item`
+ /// By default, a nullable field is created with the name `item`
///
/// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
/// field's data type does not match that of `T`
@@ -169,116 +168,52 @@ where
/// Builds the [`FixedSizeListBuilder`] and reset this builder.
pub fn finish(&mut self) -> FixedSizeListArray {
let len = self.len();
- let values_arr = self.values_builder.finish();
- let values_data = values_arr.to_data();
+ let values = self.values_builder.finish();
+ let nulls = self.null_buffer_builder.finish();
assert_eq!(
- values_data.len(), len * self.list_len as usize,
+ values.len(), len * self.list_len as usize,
"Length of the child array ({}) must be the multiple of the value
length ({}) and the array length ({}).",
- values_data.len(),
+ values.len(),
self.list_len,
len,
);
- let nulls = self.null_buffer_builder.finish();
+ let field = self
+ .field
+ .clone()
+ .unwrap_or_else(|| Arc::new(Field::new("item",
values.data_type().clone(), true)));
- let field = match &self.field {
- Some(f) => {
- let size = self.value_length();
- assert_eq!(
- f.data_type(),
- values_data.data_type(),
- "DataType of field ({}) should be the same as the
values_builder DataType ({})",
- f.data_type(),
- values_data.data_type()
- );
-
- if let Some(a) = values_arr.logical_nulls() {
- let nulls_valid = f.is_nullable()
- || nulls
- .as_ref()
- .map(|n| n.expand(size as _).contains(&a))
- .unwrap_or_default();
-
- assert!(
- nulls_valid,
- "Found unmasked nulls for non-nullable
FixedSizeListBuilder field {:?}",
- f.name()
- );
- }
- f.clone()
- }
- None => Arc::new(Field::new("item",
values_data.data_type().clone(), true)),
- };
-
- let array_data = ArrayData::builder(DataType::FixedSizeList(field,
self.list_len))
- .len(len)
- .add_child_data(values_data)
- .nulls(nulls);
-
- let array_data = unsafe { array_data.build_unchecked() };
-
- FixedSizeListArray::from(array_data)
+ FixedSizeListArray::new(field, self.list_len, values, nulls)
}
/// Builds the [`FixedSizeListBuilder`] without resetting the builder.
pub fn finish_cloned(&self) -> FixedSizeListArray {
let len = self.len();
- let values_arr = self.values_builder.finish_cloned();
- let values_data = values_arr.to_data();
+ let values = self.values_builder.finish_cloned();
+ let nulls = self.null_buffer_builder.finish_cloned();
assert_eq!(
- values_data.len(), len * self.list_len as usize,
+ values.len(), len * self.list_len as usize,
"Length of the child array ({}) must be the multiple of the value
length ({}) and the array length ({}).",
- values_data.len(),
+ values.len(),
self.list_len,
len,
);
- let nulls = self.null_buffer_builder.finish_cloned();
+ let field = self
+ .field
+ .clone()
+ .unwrap_or_else(|| Arc::new(Field::new("item",
values.data_type().clone(), true)));
- let field = match &self.field {
- Some(f) => {
- let size = self.value_length();
- assert_eq!(
- f.data_type(),
- values_data.data_type(),
- "DataType of field ({}) should be the same as the
values_builder DataType ({})",
- f.data_type(),
- values_data.data_type()
- );
- if let Some(a) = values_arr.logical_nulls() {
- let nulls_valid = f.is_nullable()
- || nulls
- .as_ref()
- .map(|n| n.expand(size as _).contains(&a))
- .unwrap_or_default();
-
- assert!(
- nulls_valid,
- "Found unmasked nulls for non-nullable
FixedSizeListBuilder field {:?}",
- f.name()
- );
- }
- f.clone()
- }
- None => Arc::new(Field::new("item",
values_data.data_type().clone(), true)),
- };
-
- let array_data = ArrayData::builder(DataType::FixedSizeList(field,
self.list_len))
- .len(len)
- .add_child_data(values_data)
- .nulls(nulls);
-
- let array_data = unsafe { array_data.build_unchecked() };
-
- FixedSizeListArray::from(array_data)
+ FixedSizeListArray::new(field, self.list_len, values, nulls)
}
}
#[cfg(test)]
mod tests {
use super::*;
+ use arrow_schema::DataType;
use crate::builder::Int32Builder;
use crate::Array;
@@ -368,7 +303,7 @@ mod tests {
}
#[test]
- #[should_panic(expected = "Found unmasked nulls for non-nullable
FixedSizeListBuilder field")]
+ #[should_panic(expected = "Found unmasked nulls for non-nullable
FixedSizeListArray")]
fn test_fixed_size_list_array_builder_with_field_null_panic() {
let builder = make_list_builder(true, true);
let mut builder = builder.with_field(Field::new("list_item",
DataType::Int32, false));
@@ -377,9 +312,7 @@ mod tests {
}
#[test]
- #[should_panic(
- expected = "DataType of field (Int64) should be the same as the
values_builder DataType (Int32)"
- )]
+ #[should_panic(expected = "FixedSizeListArray expected data type Int64 got
Int32")]
fn test_fixed_size_list_array_builder_with_field_type_panic() {
let values_builder = Int32Builder::new();
let builder = FixedSizeListBuilder::new(values_builder, 3);
@@ -417,7 +350,7 @@ mod tests {
}
#[test]
- #[should_panic(expected = "Found unmasked nulls for non-nullable
FixedSizeListBuilder field")]
+ #[should_panic(expected = "Found unmasked nulls for non-nullable
FixedSizeListArray")]
fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
let builder = make_list_builder(true, true);
let builder = builder.with_field(Field::new("list_item",
DataType::Int32, false));
@@ -439,9 +372,7 @@ mod tests {
}
#[test]
- #[should_panic(
- expected = "DataType of field (Int64) should be the same as the
values_builder DataType (Int32)"
- )]
+ #[should_panic(expected = "FixedSizeListArray expected data type Int64 got
Int32")]
fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
let builder = make_list_builder(false, false);
let builder = builder.with_field(Field::new("list_item",
DataType::Int64, true));