This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 91f66187db GH-48238: [C++] Actually write IPC schema endianness, not 
host endianness (#48239)
91f66187db is described below

commit 91f66187db250d844f4f62d454a8e21cdf10eb75
Author: Antoine Pitrou <[email protected]>
AuthorDate: Sat Nov 29 15:22:20 2025 +0100

    GH-48238: [C++] Actually write IPC schema endianness, not host endianness 
(#48239)
    
    ### Rationale for this change
    
    `Schema` objects have an endianness, but we were ignoring it when 
serializing a `Schema` to IPC, and instead writing out the host's endianness.
    
    ### Are these changes tested?
    
    Yes, by additional test.
    
    ### Are there any user-facing changes?
    
    No.
    
    **This PR contains a "Critical Fix".** (If the changes fix either (a) a 
security vulnerability, (b) a bug that caused incorrect or invalid data to be 
produced, or (c) a bug that causes a crash (even when the API contract is 
upheld), please provide explanation. If not, you can remove this.)
    
    * GitHub Issue: #48238
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/ipc/message_internal_test.cc | 28 ++++++++++++++++++++++++++--
 cpp/src/arrow/ipc/metadata_internal.cc     | 17 ++++-------------
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/ipc/message_internal_test.cc 
b/cpp/src/arrow/ipc/message_internal_test.cc
index 112240f08d..1114f55517 100644
--- a/cpp/src/arrow/ipc/message_internal_test.cc
+++ b/cpp/src/arrow/ipc/message_internal_test.cc
@@ -23,6 +23,7 @@
 #include "arrow/ipc/dictionary.h"
 #include "arrow/ipc/metadata_internal.h"
 #include "arrow/ipc/options.h"
+#include "arrow/ipc/reader.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/key_value_metadata.h"
 
@@ -38,8 +39,6 @@ using FBB = flatbuffers::FlatBufferBuilder;
 // lead to unnecessary platform- or toolchain-specific differences in
 // serialization.
 TEST(TestMessageInternal, TestByteIdentical) {
-  FBB fbb;
-  flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> fb_schema;
   DictionaryFieldMapper mapper;
 
   // Create a simple Schema with just two metadata KVPs
@@ -78,4 +77,29 @@ TEST(TestMessageInternal, TestByteIdentical) {
 
   AssertBufferEqual(expected_buffer, *out_buffer);
 }
+
+TEST(TestMessageInternal, TestEndiannessRoundtrip) {
+  DictionaryFieldMapper mapper;
+
+  for (const auto endianness : {Endianness::Little, Endianness::Big}) {
+    // Create a simple Schema with just two metadata KVPs
+    auto f0 = field("f0", int64());
+    auto f1 = field("f1", int64());
+    std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+    std::shared_ptr<KeyValueMetadata> metadata =
+        KeyValueMetadata::Make({"key_1", "key_2"}, {"key_1_value", 
"key_2_value"});
+    auto schema = ::arrow::schema({f0}, endianness, metadata);
+
+    // Serialize the Schema to a Buffer
+    std::shared_ptr<Buffer> out_buffer;
+    ASSERT_OK(
+        WriteSchemaMessage(*schema, mapper, IpcWriteOptions::Defaults(), 
&out_buffer));
+
+    // Re-open to a new Message and parse Schema
+    ASSERT_OK_AND_ASSIGN(auto message, Message::Open(out_buffer, 
/*body=*/nullptr));
+    ASSERT_OK_AND_ASSIGN(auto parsed_schema, ReadSchema(*message, nullptr));
+    AssertSchemaEqual(*schema, *parsed_schema, /*check_metadata=*/true);
+  }
+}
+
 }  // namespace arrow::ipc::internal
diff --git a/cpp/src/arrow/ipc/metadata_internal.cc 
b/cpp/src/arrow/ipc/metadata_internal.cc
index e427b42d2d..65a4fcee7a 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -933,18 +933,6 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, 
FieldPosition field_pos,
   return Status::OK();
 }
 
-// will return the endianness of the system we are running on
-// based the NUMPY_API function. See NOTICE.txt
-flatbuf::Endianness endianness() {
-  union {
-    uint32_t i;
-    char c[4];
-  } bint = {0x01020304};
-
-  return bint.c[0] == 1 ? flatbuf::Endianness::Endianness_Big
-                        : flatbuf::Endianness::Endianness_Little;
-}
-
 flatbuffers::Offset<KVVector> SerializeCustomMetadata(
     FBB& fbb, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   std::vector<KeyValueOffset> key_values;
@@ -970,7 +958,10 @@ Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
   }
 
   auto fb_offsets = fbb.CreateVector(field_offsets);
-  *out = flatbuf::CreateSchema(fbb, endianness(), fb_offsets,
+  auto fb_endianness = schema.endianness() == Endianness::Little
+                           ? flatbuf::Endianness::Endianness_Little
+                           : flatbuf::Endianness::Endianness_Big;
+  *out = flatbuf::CreateSchema(fbb, fb_endianness, fb_offsets,
                                SerializeCustomMetadata(fbb, 
schema.metadata()));
   return Status::OK();
 }

Reply via email to