This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 7fdd0d8b1 Add concat_elements_bytes (#3798)
7fdd0d8b1 is described below
commit 7fdd0d8b1afe051c07cfdfb12c3d52a6d93e92b6
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Sat Mar 4 10:13:00 2023 +0000
Add concat_elements_bytes (#3798)
---
arrow-string/src/concat_elements.rs | 109 +++++++++++++-----------------------
1 file changed, 40 insertions(+), 69 deletions(-)
diff --git a/arrow-string/src/concat_elements.rs
b/arrow-string/src/concat_elements.rs
index 4aa5a127c..1f85b4deb 100644
--- a/arrow-string/src/concat_elements.rs
+++ b/arrow-string/src/concat_elements.rs
@@ -18,29 +18,18 @@
use std::sync::Arc;
use arrow_array::builder::BufferBuilder;
+use arrow_array::types::ByteArrayType;
use arrow_array::*;
+use arrow_buffer::ArrowNativeType;
use arrow_data::bit_mask::combine_option_bitmap;
use arrow_data::ArrayDataBuilder;
use arrow_schema::{ArrowError, DataType};
-/// Returns the elementwise concatenation of a [`StringArray`].
-///
-/// An index of the resulting [`StringArray`] is null if any of
-/// `StringArray` are null at that location.
-///
-/// ```text
-/// e.g:
-///
-/// ["Hello"] + ["World"] = ["HelloWorld"]
-///
-/// ["a", "b"] + [None, "c"] = [None, "bc"]
-/// ```
-///
-/// An error will be returned if `left` and `right` have different lengths
-pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
- left: &GenericStringArray<Offset>,
- right: &GenericStringArray<Offset>,
-) -> Result<GenericStringArray<Offset>, ArrowError> {
+/// Returns the elementwise concatenation of a [`GenericByteArray`].
+pub fn concat_elements_bytes<T: ByteArrayType>(
+ left: &GenericByteArray<T>,
+ right: &GenericByteArray<T>,
+) -> Result<GenericByteArray<T>, ArrowError> {
if left.len() != right.len() {
return Err(ArrowError::ComputeError(format!(
"Arrays must have the same length: {} != {}",
@@ -63,18 +52,18 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
- right_offsets[0].as_usize(),
);
- let mut output_offsets = BufferBuilder::<Offset>::new(left_offsets.len());
- output_offsets.append(Offset::zero());
+ let mut output_offsets =
BufferBuilder::<T::Offset>::new(left_offsets.len());
+ output_offsets.append(T::Offset::usize_as(0));
for (left_idx, right_idx) in
left_offsets.windows(2).zip(right_offsets.windows(2)) {
output_values
.append_slice(&left_values[left_idx[0].as_usize()..left_idx[1].as_usize()]);
output_values.append_slice(
&right_values[right_idx[0].as_usize()..right_idx[1].as_usize()],
);
-
output_offsets.append(Offset::from_usize(output_values.len()).unwrap());
+
output_offsets.append(T::Offset::from_usize(output_values.len()).unwrap());
}
- let builder =
ArrayDataBuilder::new(GenericStringArray::<Offset>::DATA_TYPE)
+ let builder = ArrayDataBuilder::new(T::DATA_TYPE)
.len(left.len())
.add_buffer(output_offsets.finish())
.add_buffer(output_values.finish())
@@ -84,6 +73,35 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
Ok(unsafe { builder.build_unchecked() }.into())
}
+/// Returns the elementwise concatenation of a [`GenericStringArray`].
+///
+/// An index of the resulting [`GenericStringArray`] is null if any of
+/// `StringArray` are null at that location.
+///
+/// ```text
+/// e.g:
+///
+/// ["Hello"] + ["World"] = ["HelloWorld"]
+///
+/// ["a", "b"] + [None, "c"] = [None, "bc"]
+/// ```
+///
+/// An error will be returned if `left` and `right` have different lengths
+pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
+ left: &GenericStringArray<Offset>,
+ right: &GenericStringArray<Offset>,
+) -> Result<GenericStringArray<Offset>, ArrowError> {
+ concat_elements_bytes(left, right)
+}
+
+/// Returns the elementwise concatenation of a [`GenericBinaryArray`].
+pub fn concat_element_binary<Offset: OffsetSizeTrait>(
+ left: &GenericBinaryArray<Offset>,
+ right: &GenericBinaryArray<Offset>,
+) -> Result<GenericBinaryArray<Offset>, ArrowError> {
+ concat_elements_bytes(left, right)
+}
+
/// Returns the elementwise concatenation of [`StringArray`].
/// ```text
/// e.g:
@@ -158,53 +176,6 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
Ok(unsafe { builder.build_unchecked() }.into())
}
-pub fn concat_element_binary<Offset: OffsetSizeTrait>(
- left: &GenericBinaryArray<Offset>,
- right: &GenericBinaryArray<Offset>,
-) -> Result<GenericBinaryArray<Offset>, ArrowError> {
- if left.len() != right.len() {
- return Err(ArrowError::ComputeError(format!(
- "Arrays must have the same length: {} != {}",
- left.len(),
- right.len()
- )));
- }
-
- let output_bitmap = combine_option_bitmap(&[left.data(), right.data()],
left.len());
-
- let left_offsets = left.value_offsets();
- let right_offsets = right.value_offsets();
-
- let left_values = left.value_data();
- let right_values = right.value_data();
-
- let mut output_values = BufferBuilder::<u8>::new(
- left_values.len() + right_values.len()
- - left_offsets[0].as_usize()
- - right_offsets[0].as_usize(),
- );
-
- let mut output_offsets = BufferBuilder::<Offset>::new(left_offsets.len());
- output_offsets.append(Offset::zero());
- for (left_idx, right_idx) in
left_offsets.windows(2).zip(right_offsets.windows(2)) {
- output_values
-
.append_slice(&left_values[left_idx[0].as_usize()..left_idx[1].as_usize()]);
- output_values.append_slice(
- &right_values[right_idx[0].as_usize()..right_idx[1].as_usize()],
- );
-
output_offsets.append(Offset::from_usize(output_values.len()).unwrap());
- }
-
- let builder =
ArrayDataBuilder::new(GenericBinaryArray::<Offset>::DATA_TYPE)
- .len(left.len())
- .add_buffer(output_offsets.finish())
- .add_buffer(output_values.finish())
- .null_bit_buffer(output_bitmap);
-
- // SAFETY - offsets valid by construction
- Ok(unsafe { builder.build_unchecked() }.into())
-}
-
pub fn concat_elements_dyn(
left: &dyn Array,
right: &dyn Array,