This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push: new e54b72bc4d fix: Do not add null buffer for `NullArray` in MutableArrayData (#7726) e54b72bc4d is described below commit e54b72bc4d2e56d28f849546fe4e19fafc8ecb8c Author: Oleks V <comph...@users.noreply.github.com> AuthorDate: Sun Jun 22 05:09:05 2025 -0700 fix: Do not add null buffer for `NullArray` in MutableArrayData (#7726) # Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Closes #7725. # Rationale for this change Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. Do not populate null buffers when building `MutableArrayData` for `NullArray` # What changes are included in this PR? There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. # Are there any user-facing changes? If there are user-facing changes then we may require documentation to be updated before approving the PR. If there are any breaking changes to public APIs, please call them out. --- arrow-array/src/array/null_array.rs | 31 +++++++++++++++++++++++++++++++ arrow-data/src/transform/mod.rs | 4 ++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/arrow-array/src/array/null_array.rs b/arrow-array/src/array/null_array.rs index 9a7a5ebe17..2dd9570a0e 100644 --- a/arrow-array/src/array/null_array.rs +++ b/arrow-array/src/array/null_array.rs @@ -170,6 +170,9 @@ impl std::fmt::Debug for NullArray { #[cfg(test)] mod tests { use super::*; + use crate::{make_array, Int64Array, StructArray}; + use arrow_data::transform::MutableArrayData; + use arrow_schema::Field; #[test] fn test_null_array() { @@ -201,4 +204,32 @@ mod tests { let array = NullArray::new(1024 * 1024); assert_eq!(format!("{array:?}"), "NullArray(1048576)"); } + + #[test] + fn test_null_array_with_parent_null_buffer() { + let null_array = NullArray::new(1); + let int_array = Int64Array::from(vec![42]); + + let fields = vec![ + Field::new("a", DataType::Int64, true), + Field::new("b", DataType::Null, true), + ]; + + let struct_array_data = ArrayData::builder(DataType::Struct(fields.into())) + .len(1) + .add_child_data(int_array.to_data()) + .add_child_data(null_array.to_data()) + .build() + .unwrap(); + + let mut mutable = MutableArrayData::new(vec![&struct_array_data], true, 1); + + // Simulate a NULL value in the parent array, for instance, if array being queried by + // invalid index + mutable.extend_nulls(1); + let data = mutable.freeze(); + + let struct_array = Arc::new(StructArray::from(data.clone())); + assert!(make_array(data) == struct_array); + } } diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs index ae550f24a2..af0e1c104f 100644 --- a/arrow-data/src/transform/mod.rs +++ b/arrow-data/src/transform/mod.rs @@ -798,8 +798,8 @@ impl<'a> MutableArrayData<'a> { }; let nulls = match data.data_type { - // RunEndEncoded arrays cannot have top-level null bitmasks - DataType::RunEndEncoded(_, _) => None, + // RunEndEncoded and Null arrays cannot have top-level null bitmasks + DataType::RunEndEncoded(_, _) | DataType::Null => None, _ => data .null_buffer .map(|nulls| {