Jefffrey commented on code in PR #9876:
URL: https://github.com/apache/arrow-rs/pull/9876#discussion_r3186798686


##########
arrow-string/src/concat_elements.rs:
##########
@@ -168,6 +168,85 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
     Ok(unsafe { builder.build_unchecked() }.into())
 }
 
+/// Returns the elementwise concatenation of a [`FixedSizeBinaryArray`].
+///
+/// The result has `value_length = left.value_length() + right.value_length()`.
+/// An index is null if either input is null at that position.
+///
+/// An error will be returned if `left` and `right` have different lengths.
+pub fn concat_elements_fixed_size_binary(
+    left: &FixedSizeBinaryArray,
+    right: &FixedSizeBinaryArray,
+) -> Result<FixedSizeBinaryArray, ArrowError> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Arrays must have the same length: {} != {}",
+            left.len(),
+            right.len()
+        )));
+    }
+
+    let left_size = left.value_length() as usize;
+    let right_size = right.value_length() as usize;
+    let output_size = left_size + right_size;
+
+    // Pre-compute combined null bitmap so the per-row NULL check is efficient
+    let nulls = NullBuffer::union(left.nulls(), right.nulls());
+
+    let mut result = FixedSizeBinaryBuilder::with_capacity(left.len(), 
output_size as i32);
+    let mut buffer = MutableBuffer::with_capacity(output_size);
+    for i in 0..left.len() {
+        if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
+            result.append_null();
+        } else {
+            buffer.clear();
+            buffer.extend_from_slice(left.value(i));
+            buffer.extend_from_slice(right.value(i));
+            result.append_value(&buffer)?;
+        }
+    }
+
+    Ok(result.finish())
+}
+
+/// Concatenates two `BinaryViewArray`s element-wise.
+/// If either element is `Null`, the result element is also `Null`.
+///
+/// # Errors
+/// - Returns an error if the input arrays have different lengths.
+/// - Returns an error if any concatenated string exceeds `u32::MAX` in length.

Review Comment:
   nit: not strings, plus I believe its actually `i32::MAX` not `u32`



##########
arrow-string/src/concat_elements.rs:
##########
@@ -168,6 +168,85 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
     Ok(unsafe { builder.build_unchecked() }.into())
 }
 
+/// Returns the elementwise concatenation of a [`FixedSizeBinaryArray`].
+///
+/// The result has `value_length = left.value_length() + right.value_length()`.
+/// An index is null if either input is null at that position.
+///
+/// An error will be returned if `left` and `right` have different lengths.
+pub fn concat_elements_fixed_size_binary(
+    left: &FixedSizeBinaryArray,
+    right: &FixedSizeBinaryArray,
+) -> Result<FixedSizeBinaryArray, ArrowError> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Arrays must have the same length: {} != {}",
+            left.len(),
+            right.len()
+        )));
+    }
+
+    let left_size = left.value_length() as usize;
+    let right_size = right.value_length() as usize;
+    let output_size = left_size + right_size;
+
+    // Pre-compute combined null bitmap so the per-row NULL check is efficient
+    let nulls = NullBuffer::union(left.nulls(), right.nulls());
+
+    let mut result = FixedSizeBinaryBuilder::with_capacity(left.len(), 
output_size as i32);
+    let mut buffer = MutableBuffer::with_capacity(output_size);
+    for i in 0..left.len() {
+        if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
+            result.append_null();
+        } else {
+            buffer.clear();
+            buffer.extend_from_slice(left.value(i));
+            buffer.extend_from_slice(right.value(i));
+            result.append_value(&buffer)?;
+        }
+    }
+
+    Ok(result.finish())
+}
+
+/// Concatenates two `BinaryViewArray`s element-wise.
+/// If either element is `Null`, the result element is also `Null`.
+///
+/// # Errors
+/// - Returns an error if the input arrays have different lengths.
+/// - Returns an error if any concatenated string exceeds `u32::MAX` in length.
+pub fn concat_elements_binary_view_array(
+    left: &BinaryViewArray,
+    right: &BinaryViewArray,
+) -> Result<BinaryViewArray, ArrowError> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Arrays must have the same length: {} != {}",
+            left.len(),

Review Comment:
   Should we make this generic over view arrays? Since it seems we'd still miss 
`Utf8View`



##########
arrow-string/src/concat_elements.rs:
##########
@@ -355,6 +450,89 @@ mod tests {
         assert_eq!(output, expected);
     }
 
+    #[test]
+    fn test_fixed_size_binary_concat() {

Review Comment:
   Can we have tests of concat fixedsizebinary's with different sizes, and also 
when they both have size = 0



##########
arrow-string/src/concat_elements.rs:
##########
@@ -185,22 +264,38 @@ pub fn concat_elements_dyn(left: &dyn Array, right: &dyn 
Array) -> Result<ArrayR
         (DataType::Utf8, DataType::Utf8) => {
             let left = left.as_any().downcast_ref::<StringArray>().unwrap();
             let right = right.as_any().downcast_ref::<StringArray>().unwrap();
-            Ok(Arc::new(concat_elements_utf8(left, right).unwrap()))
+            Ok(Arc::new(concat_elements_utf8(left, right)?))

Review Comment:
   Nice cleanup



##########
arrow-string/src/concat_elements.rs:
##########
@@ -168,6 +168,85 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
     Ok(unsafe { builder.build_unchecked() }.into())
 }
 
+/// Returns the elementwise concatenation of a [`FixedSizeBinaryArray`].
+///
+/// The result has `value_length = left.value_length() + right.value_length()`.
+/// An index is null if either input is null at that position.
+///
+/// An error will be returned if `left` and `right` have different lengths.
+pub fn concat_elements_fixed_size_binary(
+    left: &FixedSizeBinaryArray,
+    right: &FixedSizeBinaryArray,
+) -> Result<FixedSizeBinaryArray, ArrowError> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Arrays must have the same length: {} != {}",
+            left.len(),
+            right.len()
+        )));
+    }
+
+    let left_size = left.value_length() as usize;
+    let right_size = right.value_length() as usize;
+    let output_size = left_size + right_size;
+
+    // Pre-compute combined null bitmap so the per-row NULL check is efficient
+    let nulls = NullBuffer::union(left.nulls(), right.nulls());
+
+    let mut result = FixedSizeBinaryBuilder::with_capacity(left.len(), 
output_size as i32);
+    let mut buffer = MutableBuffer::with_capacity(output_size);

Review Comment:
   I wonder if we should try use interleave here for performance 🤔 
   
   Though since this is a new feature and we don't have benchmarks for it yet I 
don't think its strictly necessary in this PR, just a thought



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to