jorgecarleitao commented on a change in pull request #8739:
URL: https://github.com/apache/arrow/pull/8739#discussion_r528333746
##########
File path: rust/arrow/src/array/equal/structure.rs
##########
@@ -30,19 +32,51 @@ fn equal_values(
.iter()
.zip(rhs.child_data())
.all(|(lhs_values, rhs_values)| {
- equal_range(lhs_values, rhs_values, lhs_start, rhs_start, len)
+ // merge the null data
+ let lhs_merged_nulls = match (lhs_nulls, lhs_values.null_buffer())
{
+ (None, None) => None,
+ (None, Some(c)) => Some(c.clone()),
+ (Some(p), None) => Some(p.clone()),
+ (Some(p), Some(c)) => {
+ let merged = (p & c).unwrap();
+ Some(merged)
+ }
+ };
+ let rhs_merged_nulls = match (rhs_nulls, rhs_values.null_buffer())
{
+ (None, None) => None,
+ (None, Some(c)) => Some(c.clone()),
+ (Some(p), None) => Some(p.clone()),
+ (Some(p), Some(c)) => {
+ let merged = (p & c).unwrap();
+ Some(merged)
+ }
+ };
+ equal_range(
+ lhs_values,
+ rhs_values,
+ lhs_merged_nulls.as_ref(),
+ rhs_merged_nulls.as_ref(),
+ lhs_start,
+ rhs_start,
+ len,
+ )
})
}
pub(super) fn struct_equal(
lhs: &ArrayData,
rhs: &ArrayData,
+ lhs_nulls: Option<&Buffer>,
+ rhs_nulls: Option<&Buffer>,
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
- if lhs.null_count() == 0 && rhs.null_count() == 0 {
- equal_values(lhs, rhs, lhs_start, rhs_start, len)
+ // we have to recalculate null counts from the null bitmaps
+ let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
+ let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
+ if lhs_null_count == 0 && rhs_null_count == 0 {
+ equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
} else {
// with nulls, we need to compare item by item whenever it is not null
(0..len).all(|i| {
Review comment:
Wouldn't it be easier if we took into account the nulls on this
statement here:
```rust
let lhs_is_null = lhs.is_null(lhs_pos);
let rhs_is_null = rhs.is_null(rhs_pos);
```
by replacing it with
```rust
let lhs_is_null = lhs.is_null(lhs_pos) && lhs.nullbitmap.isnull(lhs_pos);
let rhs_is_null = rhs.is_null(rhs_pos) && rhs.nullbitmap.isnull(rhs_pos);
```
(still keeping the re-count in place, to avoid wrongfully optimizing like
this PR fixes)
The advantage is that for all types except `struct`, this PR merges two
equal bitmaps twice (all types except `struct` now pass `null_buffer()` to
`equal_range`. E.g. in `list`:
```
lhs_values,
rhs_values,
lhs_values.null_buffer(),
rhs_values.null_buffer(),
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]