This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 1044022d59 GH-48218: [C++][Parquet] Fix Util & Level Conversion logic 
on big-endian (#48219)
1044022d59 is described below

commit 1044022d596cc334eb5662959fbea0120590b623
Author: Vishwanatha-HD <[email protected]>
AuthorDate: Mon Nov 24 17:11:49 2025 +0530

    GH-48218: [C++][Parquet] Fix Util & Level Conversion logic on big-endian 
(#48219)
    
    ### Rationale for this change
    
    This PR is intended to enable Parquet DB support on Big-endian (s390x) 
systems. The fix in this PR fixes the "util & level_conversion" logic.
    
    ### What changes are included in this PR?
    
    The fix includes changes to following files:
    cpp/src/parquet/level_conversion_inc.h
    cpp/src/parquet/test_util.h
    
    ### Are these changes tested?
    
    Yes. The changes are tested on s390x arch to make sure things are working 
fine. The fix is also tested on x86 arch, to make sure there is no new 
regression introduced.
    
    ### Are there any user-facing changes?
    
    No.
    
    GitHub main Issue link: https://github.com/apache/arrow/issues/48151
    * GitHub Issue: #48218
    
    Authored-by: Vishwanatha-HD <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/parquet/geospatial/util_internal.cc | 2 +-
 cpp/src/parquet/level_conversion_inc.h      | 9 +++++----
 cpp/src/parquet/test_util.h                 | 6 ++++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/cpp/src/parquet/geospatial/util_internal.cc 
b/cpp/src/parquet/geospatial/util_internal.cc
index 4991d58a13..d5c8d66288 100644
--- a/cpp/src/parquet/geospatial/util_internal.cc
+++ b/cpp/src/parquet/geospatial/util_internal.cc
@@ -162,7 +162,7 @@ void 
WKBGeometryBounder::MergeGeometry(::arrow::util::span<const uint8_t> bytes_
 
 void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* src, bool 
record_wkb_type) {
   uint8_t endian = src->ReadUInt8();
-#if defined(ARROW_LITTLE_ENDIAN)
+#if ARROW_LITTLE_ENDIAN
   bool swap = endian != 0x01;
 #else
   bool swap = endian != 0x00;
diff --git a/cpp/src/parquet/level_conversion_inc.h 
b/cpp/src/parquet/level_conversion_inc.h
index 5fce93e779..335f5b9215 100644
--- a/cpp/src/parquet/level_conversion_inc.h
+++ b/cpp/src/parquet/level_conversion_inc.h
@@ -299,14 +299,15 @@ int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, 
const int64_t batch_si
   ARROW_DCHECK_LE(batch_size, kExtractBitsSize);
 
   // Greater than level_info.def_level - 1 implies >= the def_level
-  auto defined_bitmap = static_cast<extract_bitmap_t>(
-      internal::GreaterThanBitmap(def_levels, batch_size, level_info.def_level 
- 1));
+  auto defined_bitmap = 
static_cast<extract_bitmap_t>(::arrow::bit_util::FromLittleEndian(
+      internal::GreaterThanBitmap(def_levels, batch_size, level_info.def_level 
- 1)));
 
   if (has_repeated_parent) {
     // Greater than level_info.repeated_ancestor_def_level - 1 implies >= the
     // repeated_ancestor_def_level
-    auto present_bitmap = 
static_cast<extract_bitmap_t>(internal::GreaterThanBitmap(
-        def_levels, batch_size, level_info.repeated_ancestor_def_level - 1));
+    auto present_bitmap = static_cast<extract_bitmap_t>(
+        ::arrow::bit_util::FromLittleEndian(internal::GreaterThanBitmap(
+            def_levels, batch_size, level_info.repeated_ancestor_def_level - 
1)));
     auto selected_bits = ExtractBits(defined_bitmap, present_bitmap);
     int64_t selected_count = ::arrow::bit_util::PopCount(present_bitmap);
     if (ARROW_PREDICT_FALSE(selected_count > upper_bound_remaining)) {
diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h
index 3ed9a1a007..9271dc290c 100644
--- a/cpp/src/parquet/test_util.h
+++ b/cpp/src/parquet/test_util.h
@@ -33,6 +33,7 @@
 #include "arrow/extension_type.h"
 #include "arrow/io/memory.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/endian.h"
 #include "arrow/util/float16.h"
 
 #include "parquet/column_page.h"
@@ -319,8 +320,9 @@ class DataPageBuilder {
     encoder.Encode(static_cast<int>(levels.size()), levels.data());
 
     int32_t rle_bytes = encoder.len();
+    int32_t rle_bytes_le = ::arrow::bit_util::ToLittleEndian(rle_bytes);
     PARQUET_THROW_NOT_OK(
-        sink_->Write(reinterpret_cast<const uint8_t*>(&rle_bytes), 
sizeof(int32_t)));
+        sink_->Write(reinterpret_cast<const uint8_t*>(&rle_bytes_le), 
sizeof(int32_t)));
     PARQUET_THROW_NOT_OK(sink_->Write(encode_buffer.data(), rle_bytes));
   }
 };
@@ -835,7 +837,7 @@ inline void GenerateData<FLBA>(int num_values, FLBA* out, 
std::vector<uint8_t>*
 // ----------------------------------------------------------------------
 // Test utility functions for geometry
 
-#if defined(ARROW_LITTLE_ENDIAN)
+#if ARROW_LITTLE_ENDIAN
 static constexpr uint8_t kWkbNativeEndianness = 0x01;
 #else
 static constexpr uint8_t kWkbNativeEndianness = 0x00;

Reply via email to