mapleFU commented on issue #8247:
URL: https://github.com/apache/arrow-rs/issues/8247#issuecomment-3237900474

   This can be reproduced in main branch. A minor fixing patch
   
   ```
   diff --git a/arrow-array/src/array/list_array.rs 
b/arrow-array/src/array/list_array.rs
   index 8836b5b0f..363bf4a24 100644
   --- a/arrow-array/src/array/list_array.rs
   +++ b/arrow-array/src/array/list_array.rs
   @@ -24,7 +24,7 @@ use crate::{
    use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
    use arrow_data::{ArrayData, ArrayDataBuilder};
    use arrow_schema::{ArrowError, DataType, FieldRef};
   -use num::Integer;
   +use num::{CheckedAdd, Integer};
    use std::any::Any;
    use std::sync::Arc;
    
   @@ -37,7 +37,7 @@ use std::sync::Arc;
    /// [`LargeBinaryArray`]: crate::array::LargeBinaryArray
    /// [`StringArray`]: crate::array::StringArray
    /// [`LargeStringArray`]: crate::array::LargeStringArray
   -pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
   +pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer 
+ CheckedAdd {
        /// True for 64 bit offset size and false for 32 bit offset size
        const IS_LARGE: bool;
        /// Prefix for the offset size
   diff --git a/arrow-array/src/builder/generic_bytes_builder.rs 
b/arrow-array/src/builder/generic_bytes_builder.rs
   index 6f8cf9de4..ef8fa9003 100644
   --- a/arrow-array/src/builder/generic_bytes_builder.rs
   +++ b/arrow-array/src/builder/generic_bytes_builder.rs
   @@ -20,9 +20,10 @@ use crate::types::{ByteArrayType, GenericBinaryType, 
GenericStringType};
    use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
    use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, 
NullBufferBuilder, ScalarBuffer};
    use arrow_data::ArrayDataBuilder;
   +use arrow_schema::ArrowError;
   +use log::{info, log};
    use std::any::Any;
    use std::sync::Arc;
   -use log::{info, log};
    
    /// Builder for [`GenericByteArray`]
    ///
   @@ -143,9 +144,11 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
        /// Appends array values and null to this builder as is
        /// (this means that underlying null values are copied as is).
        #[inline]
   -    pub fn append_array(&mut self, array: &GenericByteArray<T>) {
   +    pub fn append_array(&mut self, array: &GenericByteArray<T>) -> 
Result<(), ArrowError> {
   +        use num::CheckedAdd;
   +
            if array.len() == 0 {
   -            return;
   +            return Ok(());
            }
    
            let offsets = array.offsets();
   @@ -156,7 +159,11 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
                self.offsets_builder.extend_from_slice(&offsets[1..]);
            } else {
                // Shifting all the offsets
   -            log::info!("next_offset: {:?}, offsets[0]: {:?}", 
self.next_offset(), offsets[0]);
   +            log::info!(
   +                "next_offset: {:?}, offsets[0]: {:?}",
   +                self.next_offset(),
   +                offsets[0]
   +            );
                let shift: T::Offset = self.next_offset() - offsets[0];
    
                // Creating intermediate offsets instead of pushing each offset 
is faster
   @@ -164,11 +171,31 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
                //  and reserve the necessary capacity, it's still slower)
                let mut intermediate = Vec::with_capacity(offsets.len() - 1);
    
   +            if self
   +                .next_offset()
   +                .checked_add(&(offsets[offsets.len() - 1] - offsets[0]))
   +                .is_some()
   +            {
   +                info!(
   +                    "next_offset: {:?}, offsets[0]: {:?}",
   +                    self.next_offset(),
   +                    offsets[0]
   +                );
   +                return Err(ArrowError::OffsetOverflowError(
   +                    self.next_offset().as_usize() + offsets[offsets.len() - 
1].as_usize()
   +                        - offsets[0].as_usize(),
   +                ));
   +            }
   +
                for &offset in &offsets[1..] {
                    intermediate.push(offset + shift)
                }
                if intermediate.len() != 0 {
   -                info!("intermediate back and first: {:?} - {:?}", 
intermediate[0], intermediate[intermediate.len() - 1]);
   +                info!(
   +                    "intermediate back and first: {:?} - {:?}",
   +                    intermediate[0],
   +                    intermediate[intermediate.len() - 1]
   +                );
                }
    
                self.offsets_builder.extend_from_slice(&intermediate);
   @@ -184,13 +211,17 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
            } else {
                self.null_buffer_builder.append_n_non_nulls(array.len());
            }
   +        Ok(())
        }
    
        /// Builds the [`GenericByteArray`] and reset this builder.
        pub fn finish(&mut self) -> GenericByteArray<T> {
            let array_type = T::DATA_TYPE;
            if !self.offsets_builder.is_empty() {
   -            info!("final offsets: {:?}", 
self.offsets_builder[self.offsets_builder.len() - 1]);
   +            info!(
   +                "final offsets: {:?}",
   +                self.offsets_builder[self.offsets_builder.len() - 1]
   +            );
            }
            let array_builder = ArrayDataBuilder::new(array_type)
                .len(self.len())
   @@ -682,9 +713,9 @@ mod tests {
            let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
    
            let mut builder = GenericStringBuilder::<i32>::new();
   -        builder.append_array(&arr1);
   -        builder.append_array(&arr2);
   -        builder.append_array(&arr3);
   +        builder.append_array(&arr1).unwrap();
   +        builder.append_array(&arr2).unwrap();
   +        builder.append_array(&arr3).unwrap();
    
            let actual = builder.finish();
            let expected = GenericStringArray::<i32>::from(input);
   @@ -712,9 +743,9 @@ mod tests {
            let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
    
            let mut builder = GenericStringBuilder::<i32>::new();
   -        builder.append_array(&arr1);
   -        builder.append_array(&arr2);
   -        builder.append_array(&arr3);
   +        builder.append_array(&arr1).unwrap();
   +        builder.append_array(&arr2).unwrap();
   +        builder.append_array(&arr3).unwrap();
    
            let actual = builder.finish();
            let expected = GenericStringArray::<i32>::from(input);
   @@ -726,7 +757,7 @@ mod tests {
        fn test_append_empty_array() {
            let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
            let mut builder = GenericStringBuilder::<i32>::new();
   -        builder.append_array(&arr);
   +        builder.append_array(&arr).unwrap();
            let result = builder.finish();
            assert_eq!(result.len(), 0);
        }
   @@ -753,7 +784,7 @@ mod tests {
            assert_ne!(sliced.offsets().last(), full_array.offsets().last());
    
            let mut builder = GenericStringBuilder::<i32>::new();
   -        builder.append_array(&sliced);
   +        builder.append_array(&sliced).unwrap();
            let actual = builder.finish();
    
            let expected = GenericStringArray::<i32>::from(vec![None, 
Some("how"), None, None]);
   @@ -789,8 +820,8 @@ mod tests {
            };
    
            let mut builder = GenericStringBuilder::<i32>::new();
   -        builder.append_array(&input_1_array_with_nulls);
   -        builder.append_array(&input_2_array_with_nulls);
   +        builder.append_array(&input_1_array_with_nulls).unwrap();
   +        builder.append_array(&input_2_array_with_nulls).unwrap();
    
            let actual = builder.finish();
            let expected = GenericStringArray::<i32>::from(vec![
   @@ -836,9 +867,9 @@ mod tests {
            let slice3 = full_array.slice(7, full_array.len() - 7);
    
            let mut builder = GenericStringBuilder::<i32>::new();
   -        builder.append_array(&slice1);
   -        builder.append_array(&slice2);
   -        builder.append_array(&slice3);
   +        builder.append_array(&slice1).unwrap();
   +        builder.append_array(&slice2).unwrap();
   +        builder.append_array(&slice3).unwrap();
    
            let actual = builder.finish();
    
   diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
   index 144de8dbe..6ea33dff1 100644
   --- a/arrow-array/src/types.rs
   +++ b/arrow-array/src/types.rs
   @@ -1590,7 +1590,7 @@ pub(crate) mod bytes {
    pub trait ByteArrayType: 'static + Send + Sync + bytes::ByteArrayTypeSealed 
{
        /// Type of offset i.e i32/i64
        type Offset: OffsetSizeTrait;
   -    /// Type for representing its equivalent rust type i.e
   +    /// Type for representing its equivalent rust type i.e.,
        /// Utf8Array will have native type has &str
        /// BinaryArray will have type as [u8]
        type Native: bytes::ByteArrayNativeType + AsRef<Self::Native> + 
AsRef<[u8]> + ?Sized;
   diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
   index bd9365005..6d9aff395 100644
   --- a/arrow-select/src/concat.rs
   +++ b/arrow-select/src/concat.rs
   @@ -236,7 +236,7 @@ fn concat_bytes<T: ByteArrayType>(arrays: &[&dyn Array]) 
-> Result<ArrayRef, Arr
        let mut builder = GenericByteBuilder::<T>::with_capacity(item_capacity, 
bytes_capacity);
    
        for array in arrays {
   -        builder.append_array(array.as_bytes::<T>());
   +        builder.append_array(array.as_bytes::<T>())?;
        }
    
        Ok(Arc::new(builder.finish()))
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to