Vishwanatha-HD commented on code in PR #48180:
URL: https://github.com/apache/arrow/pull/48180#discussion_r2591499936
##########
cpp/src/arrow/compute/util.cc:
##########
@@ -299,7 +313,17 @@ void bytes_to_bits(int64_t hardware_flags, const int
num_bits, const uint8_t* by
}
int tail = num_bits % unroll;
if (tail) {
- uint64_t bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
+ uint64_t bytes_next;
+#if ARROW_LITTLE_ENDIAN
+ bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
+#else
+ // On Big-endian systems, for bytes_to_bits, load all tail bytes in
little-endian
+ // order to ensure compatibility with subsequent bit operations
+ bytes_next = 0;
+ for (int i = 0; i < tail; ++i) {
+ bytes_next |= static_cast<uint64_t>((bytes + num_bits - tail)[i]) << (8
* i);
+ }
+#endif
Review Comment:
@kou.. The SafeLoadUpTo8Bytes() function remains unchanged..
```
inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) {
ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8);
if (num_bytes == 8) {
return util::SafeLoad(reinterpret_cast<const uint64_t*>(bytes));
} else {
uint64_t word = 0;
for (int i = 0; i < num_bytes; ++i) {
word |= static_cast<uint64_t>(bytes[i]) << (8 * i);
}
return word;
}
}
```
The bytes_to_bits() function is handling the endianness fix independently..
If I do the endianness conversion inside SafeLoadUpTo8Bytes() function, rather
than here, then the testcase is not working..
```
if (tail) {
uint64_t bytes_next;
#if ARROW_LITTLE_ENDIAN
bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
#else
// On Big-endian systems, for bytes_to_bits, load all tail bytes in
little-endian
// order to ensure compatibility with subsequent bit operations
bytes_next = 0;
for (int i = 0; i < tail; ++i) {
bytes_next |= static_cast<uint64_t>((bytes + num_bits - tail)[i]) <<
(8 * i);
}
#endif
bytes_next &= 0x0101010101010101ULL;
bytes_next |= (bytes_next >> 7); // Pairs of adjacent output bits in
individual bytes
bytes_next |= (bytes_next >> 14); // 4 adjacent output bits in
individual bytes
bytes_next |= (bytes_next >> 28); // All 8 output bits in the lowest
byte
bits[num_bits / 8] = static_cast<uint8_t>(bytes_next & 0xff);
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]