This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f091cbbf0 Check overflow in MutableArrayData extend offsets (#3123)
(#3157)
f091cbbf0 is described below
commit f091cbbf0a1f212fbe1bd4d63fa018e7a5c82ccc
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Nov 22 15:27:14 2022 +0000
Check overflow in MutableArrayData extend offsets (#3123) (#3157)
* Check overflow in MutableArrayData extend offsets (#3123)
* Update arrow-data/src/transform/utils.rs
Co-authored-by: Andrew Lamb <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-data/src/transform/list.rs | 6 ++++--
arrow-data/src/transform/utils.rs | 22 +++++++++++++++++++---
arrow-data/src/transform/variable_size.rs | 6 ++++--
3 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/arrow-data/src/transform/list.rs b/arrow-data/src/transform/list.rs
index 2f14f2fb5..76a845958 100644
--- a/arrow-data/src/transform/list.rs
+++ b/arrow-data/src/transform/list.rs
@@ -21,9 +21,11 @@ use super::{
};
use crate::ArrayData;
use arrow_buffer::ArrowNativeType;
-use num::Integer;
+use num::{CheckedAdd, Integer};
-pub(super) fn build_extend<T: ArrowNativeType + Integer>(array: &ArrayData) ->
Extend {
+pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
+ array: &ArrayData,
+) -> Extend {
let offsets = array.buffer::<T>(0);
if array.null_count() == 0 {
// fast case where we can copy regions without nullability checks
diff --git a/arrow-data/src/transform/utils.rs
b/arrow-data/src/transform/utils.rs
index 6a4c240c9..b1e3388ba 100644
--- a/arrow-data/src/transform/utils.rs
+++ b/arrow-data/src/transform/utils.rs
@@ -16,7 +16,7 @@
// under the License.
use arrow_buffer::{bit_util, ArrowNativeType, MutableBuffer};
-use num::Integer;
+use num::{CheckedAdd, Integer};
/// extends the `buffer` to be able to hold `len` bits, setting all bits of
the new size to zero.
#[inline]
@@ -27,7 +27,7 @@ pub(super) fn resize_for_bits(buffer: &mut MutableBuffer,
len: usize) {
}
}
-pub(super) fn extend_offsets<T: ArrowNativeType + Integer>(
+pub(super) fn extend_offsets<T: ArrowNativeType + Integer + CheckedAdd>(
buffer: &mut MutableBuffer,
mut last_offset: T,
offsets: &[T],
@@ -36,7 +36,10 @@ pub(super) fn extend_offsets<T: ArrowNativeType + Integer>(
offsets.windows(2).for_each(|offsets| {
// compute the new offset
let length = offsets[1] - offsets[0];
- last_offset = last_offset + length;
+ // if you hit this appending to a StringArray / BinaryArray it is
because you
+ // are trying to add more data than can fit into that type. Try
breaking your data into
+ // smaller batches or using LargeStringArray / LargeBinaryArray
+ last_offset = last_offset.checked_add(&length).expect("offset
overflow");
buffer.push(last_offset);
});
}
@@ -55,3 +58,16 @@ pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(
debug_assert!(prefix.is_empty() && suffix.is_empty());
*offsets.get_unchecked(offsets.len() - 1)
}
+
+#[cfg(test)]
+mod tests {
+ use crate::transform::utils::extend_offsets;
+ use arrow_buffer::MutableBuffer;
+
+ #[test]
+ #[should_panic(expected = "offset overflow")]
+ fn test_overflow() {
+ let mut buffer = MutableBuffer::new(10);
+ extend_offsets(&mut buffer, i32::MAX - 4, &[0, 5]);
+ }
+}
diff --git a/arrow-data/src/transform/variable_size.rs
b/arrow-data/src/transform/variable_size.rs
index 73c478318..ce62459ae 100644
--- a/arrow-data/src/transform/variable_size.rs
+++ b/arrow-data/src/transform/variable_size.rs
@@ -18,7 +18,7 @@
use crate::ArrayData;
use arrow_buffer::{ArrowNativeType, MutableBuffer};
use num::traits::AsPrimitive;
-use num::Integer;
+use num::{CheckedAdd, Integer};
use super::{
Extend, _MutableArrayData,
@@ -39,7 +39,9 @@ fn extend_offset_values<T: ArrowNativeType +
AsPrimitive<usize>>(
buffer.extend_from_slice(new_values);
}
-pub(super) fn build_extend<T: ArrowNativeType + Integer + AsPrimitive<usize>>(
+pub(super) fn build_extend<
+ T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>,
+>(
array: &ArrayData,
) -> Extend {
let offsets = array.buffer::<T>(0);