alamb commented on a change in pull request #8739:
URL: https://github.com/apache/arrow/pull/8739#discussion_r529026844
##########
File path: rust/arrow/src/array/equal/structure.rs
##########
@@ -15,13 +15,15 @@
// specific language governing permissions and limitations
// under the License.
-use crate::array::ArrayData;
+use crate::{array::data::count_nulls, array::ArrayData, buffer::Buffer};
use super::equal_range;
fn equal_values(
Review comment:
Given the number of parameters this function has, it might be worth
adding doc comments on it (especially as `lhs_nulls` and `rhs_nulls` now have
non trivial semantics (nullability is checked by `AND`ing them together)
##########
File path: rust/arrow/src/array/equal/structure.rs
##########
@@ -30,19 +32,51 @@ fn equal_values(
.iter()
.zip(rhs.child_data())
.all(|(lhs_values, rhs_values)| {
- equal_range(lhs_values, rhs_values, lhs_start, rhs_start, len)
+ // merge the null data
+ let lhs_merged_nulls = match (lhs_nulls, lhs_values.null_buffer())
{
+ (None, None) => None,
+ (None, Some(c)) => Some(c.clone()),
+ (Some(p), None) => Some(p.clone()),
+ (Some(p), Some(c)) => {
+ let merged = (p & c).unwrap();
+ Some(merged)
+ }
+ };
+ let rhs_merged_nulls = match (rhs_nulls, rhs_values.null_buffer())
{
+ (None, None) => None,
+ (None, Some(c)) => Some(c.clone()),
+ (Some(p), None) => Some(p.clone()),
+ (Some(p), Some(c)) => {
+ let merged = (p & c).unwrap();
+ Some(merged)
+ }
+ };
+ equal_range(
+ lhs_values,
+ rhs_values,
+ lhs_merged_nulls.as_ref(),
+ rhs_merged_nulls.as_ref(),
+ lhs_start,
+ rhs_start,
+ len,
+ )
})
}
pub(super) fn struct_equal(
lhs: &ArrayData,
rhs: &ArrayData,
+ lhs_nulls: Option<&Buffer>,
+ rhs_nulls: Option<&Buffer>,
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
- if lhs.null_count() == 0 && rhs.null_count() == 0 {
- equal_values(lhs, rhs, lhs_start, rhs_start, len)
+ // we have to recalculate null counts from the null bitmaps
+ let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
+ let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
+ if lhs_null_count == 0 && rhs_null_count == 0 {
+ equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
} else {
// with nulls, we need to compare item by item whenever it is not null
(0..len).all(|i| {
Review comment:
An alternate thing to do is to pass in `None` for all types except
`struct`
https://github.com/apache/arrow/pull/8739/files#diff-6fca2b5d7c8b47ba17e7c93c2eaa2cf7f6b65392d0573254fee593449b8adf46R43
##########
File path: rust/arrow/src/array/equal/structure.rs
##########
@@ -30,19 +32,51 @@ fn equal_values(
.iter()
.zip(rhs.child_data())
.all(|(lhs_values, rhs_values)| {
- equal_range(lhs_values, rhs_values, lhs_start, rhs_start, len)
+ // merge the null data
+ let lhs_merged_nulls = match (lhs_nulls, lhs_values.null_buffer())
{
Review comment:
I don't know how much `c.clone()` costs below, but if we want to avoid
`clone()` you could potentially use a structure like this:
```
alamb@MacBook-Pro rust % git diff
git diff
WARNING: terminal is not fully functional
- (press RETURN)
diff --git a/rust/arrow/src/array/equal/structure.rs
b/rust/arrow/src/array/equal/structure.rs
index cad7a9ea5..dc29d44ba 100644
--- a/rust/arrow/src/array/equal/structure.rs
+++ b/rust/arrow/src/array/equal/structure.rs
@@ -28,6 +28,9 @@ fn equal_values(
rhs_start: usize,
len: usize,
) -> bool {
+ let mut temp_lhs : Option<Buffer> = None;
+ let mut temp_rhs : Option<Buffer> = None;
+
lhs.child_data()
.iter()
.zip(rhs.child_data())
@@ -35,27 +38,29 @@ fn equal_values(
// merge the null data
let lhs_merged_nulls = match (lhs_nulls,
lhs_values.null_buffer()) {
(None, None) => None,
- (None, Some(c)) => Some(c.clone()),
- (Some(p), None) => Some(p.clone()),
+ (None, Some(c)) => Some(c),
+ (Some(p), None) => Some(p),
(Some(p), Some(c)) => {
let merged = (p & c).unwrap();
- Some(merged)
+ temp_lhs = Some(merged);
+ temp_lhs.as_ref()
}
};
let rhs_merged_nulls = match (rhs_nulls,
rhs_values.null_buffer()) {
(None, None) => None,
- (None, Some(c)) => Some(c.clone()),
- (Some(p), None) => Some(p.clone()),
+ (None, Some(c)) => Some(c),
+ (Some(p), None) => Some(p),
(Some(p), Some(c)) => {
let merged = (p & c).unwrap();
- Some(merged)
+ temp_rhs = Some(merged);
+ temp_rhs.as_ref()
}
};
equal_range(
lhs_values,
rhs_values,
- lhs_merged_nulls.as_ref(),
- rhs_merged_nulls.as_ref(),
+ lhs_merged_nulls,
+ rhs_merged_nulls,
lhs_start,
rhs_start,
len,
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]