This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 82a40f3edf Handle possible overflows in StringArrayBuilder /
LargeStringArrayBuilder (#13802)
82a40f3edf is described below
commit 82a40f3edf264834c28f3a1bf2a3143951802634
Author: wiedld <[email protected]>
AuthorDate: Wed Dec 18 03:11:40 2024 -0800
Handle possible overflows in StringArrayBuilder / LargeStringArrayBuilder
(#13802)
* test(13796): reproducer of overflow on capacity
* fix(13796): handle overflows with proper max capacity number which is
valid for MutableBuffer
* refactor: use simple solution and provide panic
---
datafusion/functions/src/strings.rs | 37 +++++++++++++++++++++++++++++++------
1 file changed, 31 insertions(+), 6 deletions(-)
diff --git a/datafusion/functions/src/strings.rs
b/datafusion/functions/src/strings.rs
index caafbae6ba..a6587a91a9 100644
--- a/datafusion/functions/src/strings.rs
+++ b/datafusion/functions/src/strings.rs
@@ -124,8 +124,12 @@ pub struct StringArrayBuilder {
impl StringArrayBuilder {
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
- let mut offsets_buffer =
- MutableBuffer::with_capacity((item_capacity + 1) *
size_of::<i32>());
+ let capacity = item_capacity
+ .checked_add(1)
+ .map(|i| i.saturating_mul(size_of::<i32>()))
+ .expect("capacity integer overflow");
+
+ let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
// SAFETY: the first offset value is definitely not going to exceed
the bounds.
unsafe { offsets_buffer.push_unchecked(0_i32) };
Self {
@@ -182,7 +186,7 @@ impl StringArrayBuilder {
.len()
.try_into()
.expect("byte array offset overflow");
- unsafe { self.offsets_buffer.push_unchecked(next_offset) };
+ self.offsets_buffer.push(next_offset);
}
/// Finalise the builder into a concrete [`StringArray`].
@@ -289,8 +293,12 @@ pub struct LargeStringArrayBuilder {
impl LargeStringArrayBuilder {
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
- let mut offsets_buffer =
- MutableBuffer::with_capacity((item_capacity + 1) *
size_of::<i64>());
+ let capacity = item_capacity
+ .checked_add(1)
+ .map(|i| i.saturating_mul(size_of::<i64>()))
+ .expect("capacity integer overflow");
+
+ let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
// SAFETY: the first offset value is definitely not going to exceed
the bounds.
unsafe { offsets_buffer.push_unchecked(0_i64) };
Self {
@@ -347,7 +355,7 @@ impl LargeStringArrayBuilder {
.len()
.try_into()
.expect("byte array offset overflow");
- unsafe { self.offsets_buffer.push_unchecked(next_offset) };
+ self.offsets_buffer.push(next_offset);
}
/// Finalise the builder into a concrete [`LargeStringArray`].
@@ -452,3 +460,20 @@ impl ColumnarValueRef<'_> {
}
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ #[should_panic(expected = "capacity integer overflow")]
+ fn test_overflow_string_array_builder() {
+ let _builder = StringArrayBuilder::with_capacity(usize::MAX,
usize::MAX);
+ }
+
+ #[test]
+ #[should_panic(expected = "capacity integer overflow")]
+ fn test_overflow_large_string_array_builder() {
+ let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX,
usize::MAX);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]