tustvold commented on code in PR #1720: URL: https://github.com/apache/arrow-rs/pull/1720#discussion_r878850863
########## arrow/src/compute/kernels/concat.rs: ########## @@ -102,6 +102,25 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef> { Ok(make_array(mutable.freeze())) } +// Elementwise concatenation of StringArrays +pub fn string_concat<Offset: OffsetSizeTrait>( + left: &GenericStringArray<Offset>, + right: &GenericStringArray<Offset>, +) -> Result<GenericStringArray<Offset>> { + let left_bitmap = left.data().null_bitmap().unwrap(); + let right_bitmap = right.data().null_bitmap().unwrap(); + let concat_bitmap = (left_bitmap & right_bitmap).unwrap(); + Ok((0..left.len().max(right.len())) + .map(|i| { + if concat_bitmap.is_set(i) { + Some(left.value(i).to_owned() + right.value(i)) + } else { + None + } + }) + .collect::<GenericStringArray<Offset>>()) Review Comment: I think it would be **significantly** faster to do something like (not at all tested) ``` // TODO: Handle non-zero offset in source ArrayData if left.len() != right.len() { return Err(...) } let nulls = match (left.data().null_bitmap(), right.data.null_bitmap()) { (Some(left), Some(right)) = Some((left & right)?) (Some(left), None) => Some(left), (None, Some(right)) => Some(right), (None, None) => None, }; let left_offsets = left.value_offsets(); let right_offsets = right.value_offsets(); let left_values = left.value_data().as_slice(); let right_values = right.value_data().as_slice(); let left_iter = left_offsets.windows(2); let right_iter = right_offsets.windows(2); let mut output_offsets = BufferBuilder::<Offset>::new(left_offsets.len()); let mut output_values = BufferBuilder::<u8>::new(left_data.len() + right_data.len()); let mut cur_offset = 0; output_offsets.append(0); for (left, right) in left_iter.zip(right_iter) { let left_len = left[1] - left[0]; let right_len = right[1] - right[0]; cur_offset += left_len + right_len; output_offsets.append(cur_offset); // With checked case output_values.append_slice(&left_values[left[0]..left[1]]); output_values.append_slice(&right_values[right[0]..right[1]]);) } let mut builder = ArrayDataBuilder::new(Offset::get_data_type) .add_buffer(output_offsets.finish()) .add_buffer(output_values.finish()); if let Some(nulls) = nulls { builder = builder.null_bit_buffer(nulls); } unsafe {builder.build_unchecked()} ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org