This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 91f66187db GH-48238: [C++] Actually write IPC schema endianness, not
host endianness (#48239)
91f66187db is described below
commit 91f66187db250d844f4f62d454a8e21cdf10eb75
Author: Antoine Pitrou <[email protected]>
AuthorDate: Sat Nov 29 15:22:20 2025 +0100
GH-48238: [C++] Actually write IPC schema endianness, not host endianness
(#48239)
### Rationale for this change
`Schema` objects have an endianness, but we were ignoring it when
serializing a `Schema` to IPC, and instead writing out the host's endianness.
### Are these changes tested?
Yes, by additional test.
### Are there any user-facing changes?
No.
**This PR contains a "Critical Fix".** (If the changes fix either (a) a
security vulnerability, (b) a bug that caused incorrect or invalid data to be
produced, or (c) a bug that causes a crash (even when the API contract is
upheld), please provide explanation. If not, you can remove this.)
* GitHub Issue: #48238
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/ipc/message_internal_test.cc | 28 ++++++++++++++++++++++++++--
cpp/src/arrow/ipc/metadata_internal.cc | 17 ++++-------------
2 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/cpp/src/arrow/ipc/message_internal_test.cc
b/cpp/src/arrow/ipc/message_internal_test.cc
index 112240f08d..1114f55517 100644
--- a/cpp/src/arrow/ipc/message_internal_test.cc
+++ b/cpp/src/arrow/ipc/message_internal_test.cc
@@ -23,6 +23,7 @@
#include "arrow/ipc/dictionary.h"
#include "arrow/ipc/metadata_internal.h"
#include "arrow/ipc/options.h"
+#include "arrow/ipc/reader.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/util/key_value_metadata.h"
@@ -38,8 +39,6 @@ using FBB = flatbuffers::FlatBufferBuilder;
// lead to unnecessary platform- or toolchain-specific differences in
// serialization.
TEST(TestMessageInternal, TestByteIdentical) {
- FBB fbb;
- flatbuffers::Offset<org::apache::arrow::flatbuf::Schema> fb_schema;
DictionaryFieldMapper mapper;
// Create a simple Schema with just two metadata KVPs
@@ -78,4 +77,29 @@ TEST(TestMessageInternal, TestByteIdentical) {
AssertBufferEqual(expected_buffer, *out_buffer);
}
+
+TEST(TestMessageInternal, TestEndiannessRoundtrip) {
+ DictionaryFieldMapper mapper;
+
+ for (const auto endianness : {Endianness::Little, Endianness::Big}) {
+ // Create a simple Schema with just two metadata KVPs
+ auto f0 = field("f0", int64());
+ auto f1 = field("f1", int64());
+ std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+ std::shared_ptr<KeyValueMetadata> metadata =
+ KeyValueMetadata::Make({"key_1", "key_2"}, {"key_1_value",
"key_2_value"});
+ auto schema = ::arrow::schema({f0}, endianness, metadata);
+
+ // Serialize the Schema to a Buffer
+ std::shared_ptr<Buffer> out_buffer;
+ ASSERT_OK(
+ WriteSchemaMessage(*schema, mapper, IpcWriteOptions::Defaults(),
&out_buffer));
+
+ // Re-open to a new Message and parse Schema
+ ASSERT_OK_AND_ASSIGN(auto message, Message::Open(out_buffer,
/*body=*/nullptr));
+ ASSERT_OK_AND_ASSIGN(auto parsed_schema, ReadSchema(*message, nullptr));
+ AssertSchemaEqual(*schema, *parsed_schema, /*check_metadata=*/true);
+ }
+}
+
} // namespace arrow::ipc::internal
diff --git a/cpp/src/arrow/ipc/metadata_internal.cc
b/cpp/src/arrow/ipc/metadata_internal.cc
index e427b42d2d..65a4fcee7a 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -933,18 +933,6 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field,
FieldPosition field_pos,
return Status::OK();
}
-// will return the endianness of the system we are running on
-// based the NUMPY_API function. See NOTICE.txt
-flatbuf::Endianness endianness() {
- union {
- uint32_t i;
- char c[4];
- } bint = {0x01020304};
-
- return bint.c[0] == 1 ? flatbuf::Endianness::Endianness_Big
- : flatbuf::Endianness::Endianness_Little;
-}
-
flatbuffers::Offset<KVVector> SerializeCustomMetadata(
FBB& fbb, const std::shared_ptr<const KeyValueMetadata>& metadata) {
std::vector<KeyValueOffset> key_values;
@@ -970,7 +958,10 @@ Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
}
auto fb_offsets = fbb.CreateVector(field_offsets);
- *out = flatbuf::CreateSchema(fbb, endianness(), fb_offsets,
+ auto fb_endianness = schema.endianness() == Endianness::Little
+ ? flatbuf::Endianness::Endianness_Little
+ : flatbuf::Endianness::Endianness_Big;
+ *out = flatbuf::CreateSchema(fbb, fb_endianness, fb_offsets,
SerializeCustomMetadata(fbb,
schema.metadata()));
return Status::OK();
}