This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 82a40f3edf Handle possible overflows in StringArrayBuilder / 
LargeStringArrayBuilder (#13802)
82a40f3edf is described below

commit 82a40f3edf264834c28f3a1bf2a3143951802634
Author: wiedld <[email protected]>
AuthorDate: Wed Dec 18 03:11:40 2024 -0800

    Handle possible overflows in StringArrayBuilder / LargeStringArrayBuilder 
(#13802)
    
    * test(13796): reproducer of overflow on capacity
    
    * fix(13796): handle overflows with proper max capacity number which is 
valid for MutableBuffer
    
    * refactor: use simple solution and provide panic
---
 datafusion/functions/src/strings.rs | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/datafusion/functions/src/strings.rs 
b/datafusion/functions/src/strings.rs
index caafbae6ba..a6587a91a9 100644
--- a/datafusion/functions/src/strings.rs
+++ b/datafusion/functions/src/strings.rs
@@ -124,8 +124,12 @@ pub struct StringArrayBuilder {
 
 impl StringArrayBuilder {
     pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
-        let mut offsets_buffer =
-            MutableBuffer::with_capacity((item_capacity + 1) * 
size_of::<i32>());
+        let capacity = item_capacity
+            .checked_add(1)
+            .map(|i| i.saturating_mul(size_of::<i32>()))
+            .expect("capacity integer overflow");
+
+        let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
         // SAFETY: the first offset value is definitely not going to exceed 
the bounds.
         unsafe { offsets_buffer.push_unchecked(0_i32) };
         Self {
@@ -182,7 +186,7 @@ impl StringArrayBuilder {
             .len()
             .try_into()
             .expect("byte array offset overflow");
-        unsafe { self.offsets_buffer.push_unchecked(next_offset) };
+        self.offsets_buffer.push(next_offset);
     }
 
     /// Finalise the builder into a concrete [`StringArray`].
@@ -289,8 +293,12 @@ pub struct LargeStringArrayBuilder {
 
 impl LargeStringArrayBuilder {
     pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
-        let mut offsets_buffer =
-            MutableBuffer::with_capacity((item_capacity + 1) * 
size_of::<i64>());
+        let capacity = item_capacity
+            .checked_add(1)
+            .map(|i| i.saturating_mul(size_of::<i64>()))
+            .expect("capacity integer overflow");
+
+        let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
         // SAFETY: the first offset value is definitely not going to exceed 
the bounds.
         unsafe { offsets_buffer.push_unchecked(0_i64) };
         Self {
@@ -347,7 +355,7 @@ impl LargeStringArrayBuilder {
             .len()
             .try_into()
             .expect("byte array offset overflow");
-        unsafe { self.offsets_buffer.push_unchecked(next_offset) };
+        self.offsets_buffer.push(next_offset);
     }
 
     /// Finalise the builder into a concrete [`LargeStringArray`].
@@ -452,3 +460,20 @@ impl ColumnarValueRef<'_> {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[should_panic(expected = "capacity integer overflow")]
+    fn test_overflow_string_array_builder() {
+        let _builder = StringArrayBuilder::with_capacity(usize::MAX, 
usize::MAX);
+    }
+
+    #[test]
+    #[should_panic(expected = "capacity integer overflow")]
+    fn test_overflow_large_string_array_builder() {
+        let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX, 
usize::MAX);
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to