tustvold commented on code in PR #1720:
URL: https://github.com/apache/arrow-rs/pull/1720#discussion_r878850863


##########
arrow/src/compute/kernels/concat.rs:
##########
@@ -102,6 +102,25 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef> {
     Ok(make_array(mutable.freeze()))
 }
 
+// Elementwise concatenation of StringArrays
+pub fn string_concat<Offset: OffsetSizeTrait>(
+    left: &GenericStringArray<Offset>,
+    right: &GenericStringArray<Offset>,
+) -> Result<GenericStringArray<Offset>> {
+    let left_bitmap = left.data().null_bitmap().unwrap();
+    let right_bitmap = right.data().null_bitmap().unwrap();
+    let concat_bitmap = (left_bitmap & right_bitmap).unwrap();
+    Ok((0..left.len().max(right.len()))
+        .map(|i| {
+            if concat_bitmap.is_set(i) {
+                Some(left.value(i).to_owned() + right.value(i))
+            } else {
+                None
+            }
+        })
+        .collect::<GenericStringArray<Offset>>())

Review Comment:
   I think it would be **significantly** faster to do something like (not at 
all tested)
   
   ```
   // TODO: Handle non-zero offset in source ArrayData
   
   if left.len() != right.len() {
       return Err(...)
   }
   
   let nulls = match (left.data().null_bitmap(), right.data.null_bitmap()) {
     (Some(left), Some(right)) = Some((left & right)?)
     (Some(left), None) => Some(left),
     (None, Some(right)) => Some(right),
     (None, None) => None,
   };
   let left_offsets = left.value_offsets();
   let right_offsets = right.value_offsets();
   
   let left_values = left.value_data().as_slice();
   let right_values = right.value_data().as_slice();
   
   let left_iter = left_offsets.windows(2);
   let right_iter = right_offsets.windows(2);
   
   let mut output_offsets = BufferBuilder::<Offset>::new(left_offsets.len());
   let mut output_values = BufferBuilder::<u8>::new(left_data.len() + 
right_data.len());
   let mut cur_offset = 0;
   output_offsets.append(0);
   
   for (left, right) in left_iter.zip(right_iter) {
     let left_len = left[1] - left[0];
     let right_len = right[1] - right[0];
     cur_offset += left_len + right_len;
     output_offsets.append(cur_offset); // With checked case
     output_values.append_slice(&left_values[left[0]..left[1]]);
     output_values.append_slice(&right_values[right[0]..right[1]]);)
   }
   
   let mut builder = ArrayDataBuilder::new(Offset::get_data_type)
     .add_buffer(output_offsets.finish())
     .add_buffer(output_values.finish());
   
   if let Some(nulls) = nulls {
     builder = builder.null_bit_buffer(nulls);
   }
   
   unsafe {builder.build_unchecked()}
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to