felipecrv commented on code in PR #35814:
URL: https://github.com/apache/arrow/pull/35814#discussion_r1210359248
##########
cpp/src/arrow/scalar.cc:
##########
@@ -151,20 +152,53 @@ struct ScalarHashImpl {
Status ArrayHash(const Array& a) { return ArrayHash(*a.data()); }
- Status ArrayHash(const ArrayData& a) {
- RETURN_NOT_OK(StdHash(a.length) & StdHash(a.GetNullCount()));
- if (a.GetNullCount() != 0 && a.buffers[0] != nullptr) {
+ Status ArrayHash(const ArraySpan& a, int64_t offset, int64_t length) {
+ // Calculate null count within the range
+ const auto* validity = a.buffers[0].data;
+ const int64_t validity_size = a.buffers[0].size;
+ int64_t null_count = 0;
+ if (validity != NULLPTR) {
+ if (offset == a.offset && length == a.length) {
+ null_count = a.GetNullCount();
+ } else {
+ null_count = length - internal::CountSetBits(validity, offset, length);
+ }
+ }
+
+ RETURN_NOT_OK(StdHash(length) & StdHash(null_count));
+ if (null_count != 0) {
// We can't visit values without unboxing the whole array, so only hash
// the null bitmap for now. Only hash the null bitmap if the null count
// is not 0 to ensure hash consistency.
- RETURN_NOT_OK(BufferHash(*a.buffers[0]));
+ hash_ = internal::ComputeBitmapHash(validity, validity_size,
/*seed=*/hash_,
+ /*bits_offset=*/offset,
/*num_bits=*/length);
}
- for (const auto& child : a.child_data) {
- RETURN_NOT_OK(ArrayHash(*child));
+
+ // Hash the relevant child arrays for each type taking offset and length
+ // from the parent array into account if necessary.
+ switch (a.type->id()) {
+ case Type::RUN_END_ENCODED:
Review Comment:
No. I will add.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]