kou commented on code in PR #48180:
URL: https://github.com/apache/arrow/pull/48180#discussion_r2558343983


##########
cpp/src/arrow/compute/util.cc:
##########
@@ -47,17 +43,20 @@ inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, 
int num_bytes) {
 }
 
 inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) 
{
-  // This will not be correct on big-endian architectures.
-#if !ARROW_LITTLE_ENDIAN
-  ARROW_DCHECK(false);
-#endif
   ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8);
   if (num_bytes == 8) {
     util::SafeStore(reinterpret_cast<uint64_t*>(bytes), value);

Review Comment:
   If we have apply a change in 
https://github.com/apache/arrow/pull/48180#pullrequestreview-3496008897 for 
this part, `SafeStoreUpTo8Bytes()` works with `num_bytes = 8` on big endian?



##########
cpp/src/arrow/compute/util.cc:
##########
@@ -118,7 +117,22 @@ void bits_to_indexes_internal(int64_t hardware_flags, 
const int num_bits,
   // Optionally process the last partial word with masking out bits outside 
range
   if (tail) {
     const uint8_t* bits_tail = bits + (num_bits - tail) / 8;
+#if ARROW_LITTLE_ENDIAN
     uint64_t word = SafeLoadUpTo8Bytes(bits_tail, (tail + 7) / 8);
+#else
+    int tail_bytes = (tail + 7) / 8;
+    uint64_t word;
+    if (tail_bytes == 8) {
+      word = util::SafeLoad(reinterpret_cast<const uint64_t*>(bits_tail));
+    } else {
+      // For bit manipulation, always load into least significant bits
+      // to ensure compatibility with CountTrailingZeros on Big-endian systems
+      word = 0;
+      for (int i = 0; i < tail_bytes; ++i) {
+        word |= static_cast<uint64_t>(bits_tail[i]) << (8 * i);
+      }
+    }
+#endif

Review Comment:
   Can we revert this change with the latest `SafeLoadUpTo8Bytes()` (that has 
big endian support)?



##########
cpp/src/arrow/compute/util.cc:
##########
@@ -30,10 +30,6 @@ namespace util {
 namespace bit_util {
 
 inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) {
-  // This will not be correct on big-endian architectures.
-#if !ARROW_LITTLE_ENDIAN
-  ARROW_DCHECK(false);
-#endif

Review Comment:
   I think that we need a change in 
https://github.com/apache/arrow/pull/48180#pullrequestreview-3496008897 , to 
remove this check.



##########
cpp/src/arrow/compute/util.cc:
##########
@@ -299,7 +313,17 @@ void bytes_to_bits(int64_t hardware_flags, const int 
num_bits, const uint8_t* by
   }
   int tail = num_bits % unroll;
   if (tail) {
-    uint64_t bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
+    uint64_t bytes_next;
+#if ARROW_LITTLE_ENDIAN
+    bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
+#else
+    // On Big-endian systems, for bytes_to_bits, load all tail bytes in 
little-endian
+    // order to ensure compatibility with subsequent bit operations
+    bytes_next = 0;
+    for (int i = 0; i < tail; ++i) {
+      bytes_next |= static_cast<uint64_t>((bytes + num_bits - tail)[i]) << (8 
* i);
+    }
+#endif

Review Comment:
   Can we revert this change with the latest `SafeLoadUpTo8Bytes()` (that has 
big endian support)?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to