kou commented on code in PR #48207:
URL: https://github.com/apache/arrow/pull/48207#discussion_r2553142968


##########
cpp/src/parquet/statistics.cc:
##########
@@ -925,22 +926,94 @@ void TypedStatisticsImpl<DType>::UpdateSpaced(const T* 
values, const uint8_t* va
 
 template <typename DType>
 void TypedStatisticsImpl<DType>::PlainEncode(const T& src, std::string* dst) 
const {
+#if ARROW_LITTLE_ENDIAN
   auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, 
pool_);
   encoder->Put(&src, 1);
   auto buffer = encoder->FlushValues();
   auto ptr = reinterpret_cast<const char*>(buffer->data());
   dst->assign(ptr, static_cast<size_t>(buffer->size()));
+#else
+  // For fixed-width numeric types, write explicit little-endian bytes per spec
+  if constexpr (std::is_same_v<DType, Int32Type>) {
+    uint32_t u;
+    std::memcpy(&u, &src, sizeof(u));
+    u = ::arrow::bit_util::ToLittleEndian(u);
+    dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
+    return;
+  } else if constexpr (std::is_same_v<DType, Int64Type>) {
+    uint64_t u;
+    std::memcpy(&u, &src, sizeof(u));
+    u = ::arrow::bit_util::ToLittleEndian(u);
+    dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
+    return;
+  } else if constexpr (std::is_same_v<DType, FloatType>) {
+    uint32_t u;
+    static_assert(sizeof(u) == sizeof(float), "size");
+    std::memcpy(&u, &src, sizeof(u));
+    u = ::arrow::bit_util::ToLittleEndian(u);
+    dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
+    return;
+  } else if constexpr (std::is_same_v<DType, DoubleType>) {
+    uint64_t u;
+    static_assert(sizeof(u) == sizeof(double), "size");
+    std::memcpy(&u, &src, sizeof(u));
+    u = ::arrow::bit_util::ToLittleEndian(u);
+    dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
+    return;
+  }
+  // Fallback: use encoder for other types
+  auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, 
pool_);
+  encoder->Put(&src, 1);
+  auto buffer = encoder->FlushValues();
+  dst->assign(reinterpret_cast<const char*>(buffer->data()),
+              static_cast<size_t>(buffer->size()));

Review Comment:
   Can we reuse the implementation in `ARROW_LITTLE_ENDIAN` for this?
   
   ```cpp
   #if !ARROW_LITTLE_ENDIAN
     if constexprt (...) {
       ...
     } else if ... {
       ...
     }
   #endif
     auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, 
pool_);
     encoder->Put(&src, 1);
     auto buffer = encoder->FlushValues();
     auto ptr = reinterpret_cast<const char*>(buffer->data());
     dst->assign(ptr, static_cast<size_t>(buffer->size()));
   ```



##########
cpp/src/parquet/statistics.cc:
##########
@@ -925,22 +926,94 @@ void TypedStatisticsImpl<DType>::UpdateSpaced(const T* 
values, const uint8_t* va
 
 template <typename DType>
 void TypedStatisticsImpl<DType>::PlainEncode(const T& src, std::string* dst) 
const {
+#if ARROW_LITTLE_ENDIAN
   auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, 
pool_);
   encoder->Put(&src, 1);
   auto buffer = encoder->FlushValues();
   auto ptr = reinterpret_cast<const char*>(buffer->data());
   dst->assign(ptr, static_cast<size_t>(buffer->size()));
+#else
+  // For fixed-width numeric types, write explicit little-endian bytes per spec
+  if constexpr (std::is_same_v<DType, Int32Type>) {
+    uint32_t u;
+    std::memcpy(&u, &src, sizeof(u));
+    u = ::arrow::bit_util::ToLittleEndian(u);
+    dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
+    return;
+  } else if constexpr (std::is_same_v<DType, Int64Type>) {
+    uint64_t u;
+    std::memcpy(&u, &src, sizeof(u));
+    u = ::arrow::bit_util::ToLittleEndian(u);
+    dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
+    return;
+  } else if constexpr (std::is_same_v<DType, FloatType>) {
+    uint32_t u;
+    static_assert(sizeof(u) == sizeof(float), "size");
+    std::memcpy(&u, &src, sizeof(u));
+    u = ::arrow::bit_util::ToLittleEndian(u);
+    dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
+    return;
+  } else if constexpr (std::is_same_v<DType, DoubleType>) {
+    uint64_t u;
+    static_assert(sizeof(u) == sizeof(double), "size");
+    std::memcpy(&u, &src, sizeof(u));
+    u = ::arrow::bit_util::ToLittleEndian(u);
+    dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
+    return;
+  }

Review Comment:
   Can we do this in `XXXEncoder::Put()` instead of here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to