This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new bd2c975df8 fix: add `null_buffer` length check to 
`StringArrayBuilder`/`LargeStringArrayBuilder` (#13758)
bd2c975df8 is described below

commit bd2c975df88c9e95f2f6b5f2abed5638904e2ac0
Author: Jack <[email protected]>
AuthorDate: Sun Dec 15 21:38:29 2024 +0000

    fix: add `null_buffer` length check to 
`StringArrayBuilder`/`LargeStringArrayBuilder` (#13758)
    
    * fix: add `null_buffer` check for `LargeStringArray`
    
    Add a safety check to ensure that the alignment of buffers cannot be
    overflowed. This introduces a panic if they are not aligned through a
    runtime assertion.
    
    * fix: remove value_buffer assertion
    
    These buffers can be misaligned and it is not problematic, it is the
    `null_buffer` which we care about being of the same length.
    
    * feat: add `null_buffer` check to `StringArray`
    
    This is in a similar vein to `LargeStringArray`, as the code is the
    same, except for `i32`'s instead of `i64`.
    
    * feat: use `row_count` var to avoid drift
---
 datafusion/functions/src/strings.rs | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/datafusion/functions/src/strings.rs 
b/datafusion/functions/src/strings.rs
index d2fb5d5851..caafbae6ba 100644
--- a/datafusion/functions/src/strings.rs
+++ b/datafusion/functions/src/strings.rs
@@ -185,9 +185,24 @@ impl StringArrayBuilder {
         unsafe { self.offsets_buffer.push_unchecked(next_offset) };
     }
 
+    /// Finalise the builder into a concrete [`StringArray`].
+    ///
+    /// # Panics
+    ///
+    /// This method can panic when:
+    ///
+    /// - the provided `null_buffer` is not the same length as the 
`offsets_buffer`.
     pub fn finish(self, null_buffer: Option<NullBuffer>) -> StringArray {
+        let row_count = self.offsets_buffer.len() / size_of::<i32>() - 1;
+        if let Some(ref null_buffer) = null_buffer {
+            assert_eq!(
+                null_buffer.len(),
+                row_count,
+                "Null buffer and offsets buffer must be the same length"
+            );
+        }
         let array_builder = ArrayDataBuilder::new(DataType::Utf8)
-            .len(self.offsets_buffer.len() / size_of::<i32>() - 1)
+            .len(row_count)
             .add_buffer(self.offsets_buffer.into())
             .add_buffer(self.value_buffer.into())
             .nulls(null_buffer);
@@ -335,9 +350,24 @@ impl LargeStringArrayBuilder {
         unsafe { self.offsets_buffer.push_unchecked(next_offset) };
     }
 
+    /// Finalise the builder into a concrete [`LargeStringArray`].
+    ///
+    /// # Panics
+    ///
+    /// This method can panic when:
+    ///
+    /// - the provided `null_buffer` is not the same length as the 
`offsets_buffer`.
     pub fn finish(self, null_buffer: Option<NullBuffer>) -> LargeStringArray {
+        let row_count = self.offsets_buffer.len() / size_of::<i64>() - 1;
+        if let Some(ref null_buffer) = null_buffer {
+            assert_eq!(
+                null_buffer.len(),
+                row_count,
+                "Null buffer and offsets buffer must be the same length"
+            );
+        }
         let array_builder = ArrayDataBuilder::new(DataType::LargeUtf8)
-            .len(self.offsets_buffer.len() / size_of::<i64>() - 1)
+            .len(row_count)
             .add_buffer(self.offsets_buffer.into())
             .add_buffer(self.value_buffer.into())
             .nulls(null_buffer);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to