Vishwanatha-HD commented on code in PR #48180:
URL: https://github.com/apache/arrow/pull/48180#discussion_r2591499936


##########
cpp/src/arrow/compute/util.cc:
##########
@@ -299,7 +313,17 @@ void bytes_to_bits(int64_t hardware_flags, const int 
num_bits, const uint8_t* by
   }
   int tail = num_bits % unroll;
   if (tail) {
-    uint64_t bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
+    uint64_t bytes_next;
+#if ARROW_LITTLE_ENDIAN
+    bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
+#else
+    // On Big-endian systems, for bytes_to_bits, load all tail bytes in 
little-endian
+    // order to ensure compatibility with subsequent bit operations
+    bytes_next = 0;
+    for (int i = 0; i < tail; ++i) {
+      bytes_next |= static_cast<uint64_t>((bytes + num_bits - tail)[i]) << (8 
* i);
+    }
+#endif

Review Comment:
   @kou.. The SafeLoadUpTo8Bytes() function remains unchanged.. 
   ```
   inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) {
     ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8);
     if (num_bytes == 8) {
       return util::SafeLoad(reinterpret_cast<const uint64_t*>(bytes));
     } else {
       uint64_t word = 0;
       for (int i = 0; i < num_bytes; ++i) {
         word |= static_cast<uint64_t>(bytes[i]) << (8 * i);
       }
       return word;
     }
   }
   ```
   
   The bytes_to_bits() function is handling the endianness fix independently.. 
If I do the endianness conversion inside SafeLoadUpTo8Bytes() function, rather 
than here, then the testcase is not working.. 
    ```
    if (tail) {
       uint64_t bytes_next;
   #if ARROW_LITTLE_ENDIAN
       bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
   #else
       // On Big-endian systems, for bytes_to_bits, load all tail bytes in 
little-endian
       // order to ensure compatibility with subsequent bit operations
       bytes_next = 0;
       for (int i = 0; i < tail; ++i) {
         bytes_next |= static_cast<uint64_t>((bytes + num_bits - tail)[i]) << 
(8 * i);
       }
   #endif
       bytes_next &= 0x0101010101010101ULL;
       bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in 
individual bytes
       bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in 
individual bytes
       bytes_next |= (bytes_next >> 28);  // All 8 output bits in the lowest 
byte
       bits[num_bits / 8] = static_cast<uint8_t>(bytes_next & 0xff);
     }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to