This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f091cbbf0 Check overflow in MutableArrayData extend offsets (#3123) 
(#3157)
f091cbbf0 is described below

commit f091cbbf0a1f212fbe1bd4d63fa018e7a5c82ccc
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Nov 22 15:27:14 2022 +0000

    Check overflow in MutableArrayData extend offsets (#3123) (#3157)
    
    * Check overflow in MutableArrayData extend offsets (#3123)
    
    * Update arrow-data/src/transform/utils.rs
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow-data/src/transform/list.rs          |  6 ++++--
 arrow-data/src/transform/utils.rs         | 22 +++++++++++++++++++---
 arrow-data/src/transform/variable_size.rs |  6 ++++--
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/arrow-data/src/transform/list.rs b/arrow-data/src/transform/list.rs
index 2f14f2fb5..76a845958 100644
--- a/arrow-data/src/transform/list.rs
+++ b/arrow-data/src/transform/list.rs
@@ -21,9 +21,11 @@ use super::{
 };
 use crate::ArrayData;
 use arrow_buffer::ArrowNativeType;
-use num::Integer;
+use num::{CheckedAdd, Integer};
 
-pub(super) fn build_extend<T: ArrowNativeType + Integer>(array: &ArrayData) -> 
Extend {
+pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
+    array: &ArrayData,
+) -> Extend {
     let offsets = array.buffer::<T>(0);
     if array.null_count() == 0 {
         // fast case where we can copy regions without nullability checks
diff --git a/arrow-data/src/transform/utils.rs 
b/arrow-data/src/transform/utils.rs
index 6a4c240c9..b1e3388ba 100644
--- a/arrow-data/src/transform/utils.rs
+++ b/arrow-data/src/transform/utils.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use arrow_buffer::{bit_util, ArrowNativeType, MutableBuffer};
-use num::Integer;
+use num::{CheckedAdd, Integer};
 
 /// extends the `buffer` to be able to hold `len` bits, setting all bits of 
the new size to zero.
 #[inline]
@@ -27,7 +27,7 @@ pub(super) fn resize_for_bits(buffer: &mut MutableBuffer, 
len: usize) {
     }
 }
 
-pub(super) fn extend_offsets<T: ArrowNativeType + Integer>(
+pub(super) fn extend_offsets<T: ArrowNativeType + Integer + CheckedAdd>(
     buffer: &mut MutableBuffer,
     mut last_offset: T,
     offsets: &[T],
@@ -36,7 +36,10 @@ pub(super) fn extend_offsets<T: ArrowNativeType + Integer>(
     offsets.windows(2).for_each(|offsets| {
         // compute the new offset
         let length = offsets[1] - offsets[0];
-        last_offset = last_offset + length;
+        // if you hit this appending to a StringArray / BinaryArray it is 
because you
+        // are trying to add more data than can fit into that type. Try 
breaking your data into
+        // smaller batches or using LargeStringArray / LargeBinaryArray
+        last_offset = last_offset.checked_add(&length).expect("offset 
overflow");
         buffer.push(last_offset);
     });
 }
@@ -55,3 +58,16 @@ pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(
     debug_assert!(prefix.is_empty() && suffix.is_empty());
     *offsets.get_unchecked(offsets.len() - 1)
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::transform::utils::extend_offsets;
+    use arrow_buffer::MutableBuffer;
+
+    #[test]
+    #[should_panic(expected = "offset overflow")]
+    fn test_overflow() {
+        let mut buffer = MutableBuffer::new(10);
+        extend_offsets(&mut buffer, i32::MAX - 4, &[0, 5]);
+    }
+}
diff --git a/arrow-data/src/transform/variable_size.rs 
b/arrow-data/src/transform/variable_size.rs
index 73c478318..ce62459ae 100644
--- a/arrow-data/src/transform/variable_size.rs
+++ b/arrow-data/src/transform/variable_size.rs
@@ -18,7 +18,7 @@
 use crate::ArrayData;
 use arrow_buffer::{ArrowNativeType, MutableBuffer};
 use num::traits::AsPrimitive;
-use num::Integer;
+use num::{CheckedAdd, Integer};
 
 use super::{
     Extend, _MutableArrayData,
@@ -39,7 +39,9 @@ fn extend_offset_values<T: ArrowNativeType + 
AsPrimitive<usize>>(
     buffer.extend_from_slice(new_values);
 }
 
-pub(super) fn build_extend<T: ArrowNativeType + Integer + AsPrimitive<usize>>(
+pub(super) fn build_extend<
+    T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>,
+>(
     array: &ArrayData,
 ) -> Extend {
     let offsets = array.buffer::<T>(0);

Reply via email to