alamb commented on code in PR #10161:
URL: https://github.com/apache/arrow-rs/pull/10161#discussion_r3462582904
##########
arrow-buffer/src/buffer/null.rs:
##########
@@ -78,9 +78,11 @@ impl NullBuffer {
/// can yield significant performance improvements over an iterator
approach
pub fn union(lhs: Option<&NullBuffer>, rhs: Option<&NullBuffer>) ->
Option<NullBuffer> {
match (lhs, rhs) {
- (Some(lhs), Some(rhs)) => Some(Self::new(lhs.inner() &
rhs.inner())),
- (Some(n), None) | (None, Some(n)) => Some(n.clone()),
- (None, None) => None,
+ (Some(lhs), Some(rhs)) if lhs.null_count() > 0 || rhs.null_count()
> 0 => {
Review Comment:
this is a nice bonus optimization
##########
arrow-string/src/concat_elements.rs:
##########
@@ -221,6 +221,158 @@ pub fn concat_elements_fixed_size_binary(
Ok(result.finish())
}
+struct ConcatByteViewBuilder<T>
+where
+ T: ByteViewType,
+{
+ views: Vec<u128>,
+ data: Vec<u8>,
+ inline: Vec<u8>,
+ phantom: PhantomData<T>,
Review Comment:
you couuld potentially avoid the generic bounds on the struct and only make
`finish` generic
However, it is not clear to me that is all that much ebtter
```diff
diff --git a/arrow-string/src/concat_elements.rs
b/arrow-string/src/concat_elements.rs
index 0739c804919..323b7883b7b 100644
--- a/arrow-string/src/concat_elements.rs
+++ b/arrow-string/src/concat_elements.rs
@@ -17,7 +17,6 @@
//! Provides utility functions for concatenation of elements in arrays.
-use std::marker::PhantomData;
use std::sync::Arc;
use arrow_array::builder::{BufferBuilder, FixedSizeBinaryBuilder,
make_view};
@@ -221,25 +220,21 @@ pub fn concat_elements_fixed_size_binary(
Ok(result.finish())
}
-struct ConcatByteViewBuilder<T>
-where
- T: ByteViewType,
-{
+struct ConcatByteViewBuilder {
views: Vec<u128>,
data: Vec<u8>,
inline: Vec<u8>,
- phantom: PhantomData<T>,
}
-impl<T> ConcatByteViewBuilder<T>
-where
- T: ByteViewType,
-{
+impl ConcatByteViewBuilder {
/// Returns the elementwise concatenation of two
[`GenericByteViewArray`]s.
- fn concat_elements_view_array(
+ fn concat_elements_view_array<T>(
left: &GenericByteViewArray<T>,
right: &GenericByteViewArray<T>,
- ) -> Result<GenericByteViewArray<T>, ArrowError> {
+ ) -> Result<GenericByteViewArray<T>, ArrowError>
+ where
+ T: ByteViewType,
+ {
let len = left.len();
if len != right.len() {
return Err(ArrowError::ComputeError(format!(
@@ -304,7 +299,6 @@ where
views: Vec::with_capacity(item_capacity),
data: Vec::with_capacity(data_capacity),
inline: Vec::with_capacity(MAX_INLINE_VIEW_LEN as usize),
- phantom: PhantomData,
}
}
@@ -339,10 +333,13 @@ where
self.views.push(0);
}
- fn finish(
+ fn finish<T>(
self,
null_buffer: Option<NullBuffer>,
- ) -> Result<GenericByteViewArray<T>, ArrowError> {
+ ) -> Result<GenericByteViewArray<T>, ArrowError>
+ where
+ T: ByteViewType,
+ {
if let Some(ref nulls) = null_buffer {
if nulls.len() != self.views.len() {
return Err(ArrowError::ComputeError(format!(
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]