nevi-me commented on a change in pull request #8739:
URL: https://github.com/apache/arrow/pull/8739#discussion_r531781486
##########
File path: rust/arrow/src/array/equal/structure.rs
##########
@@ -15,45 +15,97 @@
// specific language governing permissions and limitations
// under the License.
-use crate::array::ArrayData;
+use crate::{
+ array::data::count_nulls, array::ArrayData, buffer::Buffer,
util::bit_util::get_bit,
+};
use super::equal_range;
+/// Compares the values of two [ArrayData] starting at `lhs_start` and
`rhs_start` respectively
+/// for `len` slots. The null buffers `lhs_nulls` and `rhs_nulls` inherit
parent nullability.
+///
+/// If an array is a child of a struct or list, the array's nulls have to be
merged with the parent.
+/// This then affects the null count of the array, thus the merged nulls are
passed separately
+/// as `lhs_nulls` and `rhs_nulls` variables to functions.
+/// The nulls are merged with a bitwise AND, and null counts are recomputed
wheer necessary.
fn equal_values(
lhs: &ArrayData,
rhs: &ArrayData,
+ lhs_nulls: Option<&Buffer>,
+ rhs_nulls: Option<&Buffer>,
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
+ let mut temp_lhs: Option<Buffer> = None;
+ let mut temp_rhs: Option<Buffer> = None;
+
lhs.child_data()
.iter()
.zip(rhs.child_data())
.all(|(lhs_values, rhs_values)| {
- equal_range(lhs_values, rhs_values, lhs_start, rhs_start, len)
+ // merge the null data
+ let lhs_merged_nulls = match (lhs_nulls, lhs_values.null_buffer())
{
+ (None, None) => None,
+ (None, Some(c)) => Some(c),
+ (Some(p), None) => Some(p),
+ (Some(p), Some(c)) => {
+ let merged = (p & c).unwrap();
+ temp_lhs = Some(merged);
+ temp_lhs.as_ref()
+ }
+ };
+ let rhs_merged_nulls = match (rhs_nulls, rhs_values.null_buffer())
{
+ (None, None) => None,
+ (None, Some(c)) => Some(c),
+ (Some(p), None) => Some(p),
+ (Some(p), Some(c)) => {
+ let merged = (p & c).unwrap();
+ temp_rhs = Some(merged);
+ temp_rhs.as_ref()
+ }
+ };
+ equal_range(
+ lhs_values,
+ rhs_values,
+ lhs_merged_nulls,
+ rhs_merged_nulls,
+ lhs_start,
+ rhs_start,
+ len,
+ )
})
}
pub(super) fn struct_equal(
lhs: &ArrayData,
rhs: &ArrayData,
+ lhs_nulls: Option<&Buffer>,
+ rhs_nulls: Option<&Buffer>,
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
- if lhs.null_count() == 0 && rhs.null_count() == 0 {
- equal_values(lhs, rhs, lhs_start, rhs_start, len)
+ // we have to recalculate null counts from the null buffers
+ let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
+ let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
+ if lhs_null_count == 0 && rhs_null_count == 0 {
+ equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
} else {
+ // get a ref of the null buffer bytes, to use in testing for nullness
Review comment:
@jorgecarleitao does this address your comment? I was incorrectly using
the array's null buffer, instead of checking nullness from the merged one. If
it doesn't, then it means I didn't understand what you were suggesting.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]