tustvold commented on code in PR #1781:
URL: https://github.com/apache/arrow-rs/pull/1781#discussion_r889358859
##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
/// This function is useful when implementing operations on higher level
arrays.
#[allow(clippy::unnecessary_wraps)]
pub(super) fn combine_option_bitmap(
- left_data: &ArrayData,
- right_data: &ArrayData,
+ arrays: &[&ArrayData],
len_in_bits: usize,
) -> Result<Option<Buffer>> {
- let left_offset_in_bits = left_data.offset();
- let right_offset_in_bits = right_data.offset();
-
- let left = left_data.null_buffer();
- let right = right_data.null_buffer();
-
- match left {
- None => match right {
- None => Ok(None),
- Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits,
len_in_bits))),
- },
- Some(l) => match right {
- None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
- Some(r) => Ok(Some(buffer_bin_and(
- l,
- left_offset_in_bits,
- r,
- right_offset_in_bits,
- len_in_bits,
- ))),
- },
+ if arrays.is_empty() {
+ return Err(ArrowError::ComputeError(
+ "Arrays must not be empty".to_string(),
+ ));
}
+
+ let mut buffers = arrays
+ .iter()
+ .map(|array| (array.null_buffer(), array.offset()));
Review Comment:
```suggestion
.map(|array| (array.null_buffer().cloned(), array.offset()));
```
Would allow removing it from the other locations. Again a very minor nit
##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
/// This function is useful when implementing operations on higher level
arrays.
#[allow(clippy::unnecessary_wraps)]
pub(super) fn combine_option_bitmap(
- left_data: &ArrayData,
- right_data: &ArrayData,
+ arrays: &[&ArrayData],
len_in_bits: usize,
) -> Result<Option<Buffer>> {
- let left_offset_in_bits = left_data.offset();
- let right_offset_in_bits = right_data.offset();
-
- let left = left_data.null_buffer();
- let right = right_data.null_buffer();
-
- match left {
- None => match right {
- None => Ok(None),
- Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits,
len_in_bits))),
- },
- Some(l) => match right {
- None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
- Some(r) => Ok(Some(buffer_bin_and(
- l,
- left_offset_in_bits,
- r,
- right_offset_in_bits,
- len_in_bits,
- ))),
- },
+ if arrays.is_empty() {
+ return Err(ArrowError::ComputeError(
+ "Arrays must not be empty".to_string(),
+ ));
}
+
+ let mut buffers = arrays
+ .iter()
+ .map(|array| (array.null_buffer(), array.offset()));
+
+ let init = buffers.next().unwrap();
Review Comment:
You could theoretically combine this with the check above, e.g. something
like
```
let init = buffers.next().ok_or_else(|| ArrowError::ComputeError(..))?;
```
But definitely not necessary
##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
/// This function is useful when implementing operations on higher level
arrays.
#[allow(clippy::unnecessary_wraps)]
pub(super) fn combine_option_bitmap(
- left_data: &ArrayData,
- right_data: &ArrayData,
+ arrays: &[&ArrayData],
len_in_bits: usize,
) -> Result<Option<Buffer>> {
- let left_offset_in_bits = left_data.offset();
- let right_offset_in_bits = right_data.offset();
-
- let left = left_data.null_buffer();
- let right = right_data.null_buffer();
-
- match left {
- None => match right {
- None => Ok(None),
- Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits,
len_in_bits))),
- },
- Some(l) => match right {
- None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
- Some(r) => Ok(Some(buffer_bin_and(
- l,
- left_offset_in_bits,
- r,
- right_offset_in_bits,
- len_in_bits,
- ))),
- },
+ if arrays.is_empty() {
+ return Err(ArrowError::ComputeError(
+ "Arrays must not be empty".to_string(),
+ ));
}
+
+ let mut buffers = arrays
+ .iter()
+ .map(|array| (array.null_buffer(), array.offset()));
+
+ let init = buffers.next().unwrap();
+ Ok(buffers
+ .fold((init.0.cloned(), init.1), |acc, buffer| {
Review Comment:
Having the offset here is clever :+1:
##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
/// This function is useful when implementing operations on higher level
arrays.
#[allow(clippy::unnecessary_wraps)]
pub(super) fn combine_option_bitmap(
- left_data: &ArrayData,
- right_data: &ArrayData,
+ arrays: &[&ArrayData],
len_in_bits: usize,
) -> Result<Option<Buffer>> {
- let left_offset_in_bits = left_data.offset();
- let right_offset_in_bits = right_data.offset();
-
- let left = left_data.null_buffer();
- let right = right_data.null_buffer();
-
- match left {
- None => match right {
- None => Ok(None),
- Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits,
len_in_bits))),
- },
- Some(l) => match right {
- None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
- Some(r) => Ok(Some(buffer_bin_and(
- l,
- left_offset_in_bits,
- r,
- right_offset_in_bits,
- len_in_bits,
- ))),
- },
+ if arrays.is_empty() {
+ return Err(ArrowError::ComputeError(
+ "Arrays must not be empty".to_string(),
+ ));
}
+
+ let mut buffers = arrays
+ .iter()
+ .map(|array| (array.null_buffer(), array.offset()));
+
+ let init = buffers.next().unwrap();
+ Ok(buffers
+ .fold((init.0.cloned(), init.1), |acc, buffer| {
+ match (acc, buffer) {
+ ((None, _), (None, _)) => (None, 0),
+ ((Some(buffer), offset), (None, _)) => (Some(buffer), offset),
+ ((None, _), (Some(buffer), offset)) => (Some(buffer.clone()),
offset),
+ (
+ (Some(buffer_left), offset_left),
+ (Some(buffer_right), offset_right),
+ ) => (
+ Some(buffer_bin_and(
+ &buffer_left,
+ offset_left,
+ buffer_right,
+ offset_right,
+ len_in_bits,
+ )),
+ 0,
+ ),
+ }
+ })
+ .0)
Review Comment:
I think there is a bug if there is only a single null buffer, and it has an
offset. In particular I think this will not perform the `bit_slice` operation
to "normalize" the bitmap. This appears to just discard the offset.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]