This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new bd2c975df8 fix: add `null_buffer` length check to
`StringArrayBuilder`/`LargeStringArrayBuilder` (#13758)
bd2c975df8 is described below
commit bd2c975df88c9e95f2f6b5f2abed5638904e2ac0
Author: Jack <[email protected]>
AuthorDate: Sun Dec 15 21:38:29 2024 +0000
fix: add `null_buffer` length check to
`StringArrayBuilder`/`LargeStringArrayBuilder` (#13758)
* fix: add `null_buffer` check for `LargeStringArray`
Add a safety check to ensure that the alignment of buffers cannot be
overflowed. This introduces a panic if they are not aligned through a
runtime assertion.
* fix: remove value_buffer assertion
These buffers can be misaligned and it is not problematic, it is the
`null_buffer` which we care about being of the same length.
* feat: add `null_buffer` check to `StringArray`
This is in a similar vein to `LargeStringArray`, as the code is the
same, except for `i32`'s instead of `i64`.
* feat: use `row_count` var to avoid drift
---
datafusion/functions/src/strings.rs | 34 ++++++++++++++++++++++++++++++++--
1 file changed, 32 insertions(+), 2 deletions(-)
diff --git a/datafusion/functions/src/strings.rs
b/datafusion/functions/src/strings.rs
index d2fb5d5851..caafbae6ba 100644
--- a/datafusion/functions/src/strings.rs
+++ b/datafusion/functions/src/strings.rs
@@ -185,9 +185,24 @@ impl StringArrayBuilder {
unsafe { self.offsets_buffer.push_unchecked(next_offset) };
}
+ /// Finalise the builder into a concrete [`StringArray`].
+ ///
+ /// # Panics
+ ///
+ /// This method can panic when:
+ ///
+ /// - the provided `null_buffer` is not the same length as the
`offsets_buffer`.
pub fn finish(self, null_buffer: Option<NullBuffer>) -> StringArray {
+ let row_count = self.offsets_buffer.len() / size_of::<i32>() - 1;
+ if let Some(ref null_buffer) = null_buffer {
+ assert_eq!(
+ null_buffer.len(),
+ row_count,
+ "Null buffer and offsets buffer must be the same length"
+ );
+ }
let array_builder = ArrayDataBuilder::new(DataType::Utf8)
- .len(self.offsets_buffer.len() / size_of::<i32>() - 1)
+ .len(row_count)
.add_buffer(self.offsets_buffer.into())
.add_buffer(self.value_buffer.into())
.nulls(null_buffer);
@@ -335,9 +350,24 @@ impl LargeStringArrayBuilder {
unsafe { self.offsets_buffer.push_unchecked(next_offset) };
}
+ /// Finalise the builder into a concrete [`LargeStringArray`].
+ ///
+ /// # Panics
+ ///
+ /// This method can panic when:
+ ///
+ /// - the provided `null_buffer` is not the same length as the
`offsets_buffer`.
pub fn finish(self, null_buffer: Option<NullBuffer>) -> LargeStringArray {
+ let row_count = self.offsets_buffer.len() / size_of::<i64>() - 1;
+ if let Some(ref null_buffer) = null_buffer {
+ assert_eq!(
+ null_buffer.len(),
+ row_count,
+ "Null buffer and offsets buffer must be the same length"
+ );
+ }
let array_builder = ArrayDataBuilder::new(DataType::LargeUtf8)
- .len(self.offsets_buffer.len() / size_of::<i64>() - 1)
+ .len(row_count)
.add_buffer(self.offsets_buffer.into())
.add_buffer(self.value_buffer.into())
.nulls(null_buffer);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]