alamb commented on code in PR #7309:
URL: https://github.com/apache/arrow-rs/pull/7309#discussion_r2018967683


##########
arrow-array/src/builder/generic_bytes_builder.rs:
##########
@@ -129,6 +129,48 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         self.offsets_builder.append(self.next_offset());
     }
 
+    /// Appends array values and null to this builder as is
+    /// (this means that underlying null values are copied as is).
+    #[inline]
+    pub fn append_array(&mut self, array: &GenericByteArray<T>) {
+        if array.len() == 0 {
+            return;
+        }
+
+        let offsets = array.offsets();
+
+        // If the offsets are contiguous, we can append them directly avoiding 
the need to align

Review Comment:
   when would this case happen (contiguous offsets)? It seems like it would 
only happen if you are `concat`ing two slices of the same original underlying 
array 🤔 



##########
arrow-array/src/builder/generic_bytes_builder.rs:
##########
@@ -129,6 +129,48 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         self.offsets_builder.append(self.next_offset());
     }
 
+    /// Appends array values and null to this builder as is
+    /// (this means that underlying null values are copied as is).
+    #[inline]
+    pub fn append_array(&mut self, array: &GenericByteArray<T>) {
+        if array.len() == 0 {
+            return;
+        }
+
+        let offsets = array.offsets();
+
+        // If the offsets are contiguous, we can append them directly avoiding 
the need to align
+        // them
+        if self.next_offset() == offsets[0] {
+            self.offsets_builder.append_slice(&offsets[1..]);
+        } else {
+            // Shifting all the offsets
+            let shift: T::Offset = self.next_offset() - offsets[0];
+
+            // Creating intermediate offsets instead of pushing each offset is 
faster
+            // (even if we make MutableBuffer to avoid updating length on each 
push
+            //  and reserve the necessary capacity, it's still slower)
+            let mut intermediate = Vec::with_capacity(offsets.len() - 1);

Review Comment:
   🤔  maybe we need something like `extend_from_iter` in offsets builder (not 
for this PR)



##########
arrow-array/src/builder/generic_bytes_builder.rs:
##########
@@ -593,4 +635,101 @@ mod tests {
             &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
         )
     }
+
+    #[test]
+    fn test_append_array_without_nulls() {
+        let input = vec![
+            "hello", "world", "how", "are", "you", "doing", "today", "I", 
"am", "doing", "well",
+            "thank", "you", "for", "asking",
+        ];
+        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
+        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
+        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
+
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&arr1);
+        builder.append_array(&arr2);
+        builder.append_array(&arr3);
+
+        let actual = builder.finish();
+        let expected = GenericStringArray::<i32>::from(input);
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_append_array_with_nulls() {
+        let input = vec![
+            Some("hello"),
+            None,
+            Some("how"),
+            None,
+            None,
+            None,
+            None,
+            Some("I"),
+            Some("am"),
+            Some("doing"),
+            Some("well"),
+        ];
+        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
+        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
+        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
+
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&arr1);
+        builder.append_array(&arr2);
+        builder.append_array(&arr3);
+
+        let actual = builder.finish();
+        let expected = GenericStringArray::<i32>::from(input);
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_append_empty_array() {
+        let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&arr);
+        let result = builder.finish();
+        assert_eq!(result.len(), 0);
+    }
+
+    #[test]
+    fn test_append_array_with_offset_not_starting_at_0() {
+        let input = vec![
+            Some("hello"),
+            None,
+            Some("how"),
+            None,
+            None,
+            None,
+            None,
+            Some("I"),
+            Some("am"),
+            Some("doing"),
+            Some("well"),
+        ];
+        let full_array = GenericStringArray::<i32>::from(input);
+        let sliced = full_array.slice(1, 4);
+
+        assert_ne!(sliced.offsets()[0].as_usize(), 0);
+        assert_ne!(sliced.offsets().last(), full_array.offsets().last());
+
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&sliced);
+        let actual = builder.finish();
+
+        let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), 
None, None]);
+
+        println!("actual: {:?}", actual);
+        println!("expected: {:?}", expected);
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_append_underlying_null_values_added_as_is() {
+        // TODO

Review Comment:
   Do you meant to leave this as a TODO?



##########
arrow-array/src/builder/generic_bytes_builder.rs:
##########
@@ -593,4 +635,101 @@ mod tests {
             &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
         )
     }
+
+    #[test]
+    fn test_append_array_without_nulls() {
+        let input = vec![
+            "hello", "world", "how", "are", "you", "doing", "today", "I", 
"am", "doing", "well",
+            "thank", "you", "for", "asking",
+        ];
+        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
+        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
+        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
+
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&arr1);
+        builder.append_array(&arr2);
+        builder.append_array(&arr3);
+
+        let actual = builder.finish();
+        let expected = GenericStringArray::<i32>::from(input);
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_append_array_with_nulls() {
+        let input = vec![
+            Some("hello"),
+            None,
+            Some("how"),
+            None,
+            None,
+            None,
+            None,
+            Some("I"),
+            Some("am"),
+            Some("doing"),
+            Some("well"),
+        ];
+        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
+        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
+        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
+
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&arr1);
+        builder.append_array(&arr2);
+        builder.append_array(&arr3);
+
+        let actual = builder.finish();
+        let expected = GenericStringArray::<i32>::from(input);
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_append_empty_array() {
+        let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&arr);
+        let result = builder.finish();
+        assert_eq!(result.len(), 0);
+    }
+
+    #[test]
+    fn test_append_array_with_offset_not_starting_at_0() {
+        let input = vec![
+            Some("hello"),
+            None,
+            Some("how"),
+            None,
+            None,
+            None,
+            None,
+            Some("I"),
+            Some("am"),
+            Some("doing"),
+            Some("well"),
+        ];
+        let full_array = GenericStringArray::<i32>::from(input);
+        let sliced = full_array.slice(1, 4);
+
+        assert_ne!(sliced.offsets()[0].as_usize(), 0);
+        assert_ne!(sliced.offsets().last(), full_array.offsets().last());
+
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&sliced);
+        let actual = builder.finish();
+
+        let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), 
None, None]);
+
+        println!("actual: {:?}", actual);
+        println!("expected: {:?}", expected);
+        assert_eq!(actual, expected);

Review Comment:
   Minor but you can avoid printing the results except if the assert fails like
   
   ```suggestion
           assert_eq!(actual, expected, "actual: {actual:?}\nexpected: 
{expected:?}");
   ```



##########
arrow/benches/concatenate_kernel.rs:
##########
@@ -47,6 +47,26 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| bench_concat(&v1, &v2))
     });
 
+    {

Review Comment:
   As you added code for boolean and Utf8 arrays, can you also please add 
benchmarks for those cases too?
   
   It would be really nice (and faster to review/merge) if you added the new 
benchmarks in a separate PR



##########
arrow-array/src/builder/generic_bytes_builder.rs:
##########
@@ -593,4 +635,101 @@ mod tests {
             &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
         )
     }
+
+    #[test]
+    fn test_append_array_without_nulls() {
+        let input = vec![
+            "hello", "world", "how", "are", "you", "doing", "today", "I", 
"am", "doing", "well",
+            "thank", "you", "for", "asking",
+        ];
+        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
+        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
+        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
+
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&arr1);
+        builder.append_array(&arr2);
+        builder.append_array(&arr3);
+
+        let actual = builder.finish();
+        let expected = GenericStringArray::<i32>::from(input);
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_append_array_with_nulls() {
+        let input = vec![
+            Some("hello"),
+            None,
+            Some("how"),
+            None,
+            None,
+            None,
+            None,
+            Some("I"),
+            Some("am"),
+            Some("doing"),
+            Some("well"),
+        ];
+        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
+        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
+        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
+
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_array(&arr1);
+        builder.append_array(&arr2);
+        builder.append_array(&arr3);
+
+        let actual = builder.finish();
+        let expected = GenericStringArray::<i32>::from(input);
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_append_empty_array() {

Review Comment:
   Could you please also add a test where the offsets are contiguous? Maybe I 
missed it. For example, I think if you sliced some data off the end of an array 
and then tried to append it back



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to