This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 8345991ad6 Result into error in case of endianness mismatches (#5301)
8345991ad6 is described below
commit 8345991ad662d63feed35356171842c27fb36783
Author: Paolo Angioletti <[email protected]>
AuthorDate: Mon Jan 15 14:27:25 2024 +0000
Result into error in case of endianness mismatches (#5301)
This commit implements the Byte Order (Endianness) recommendations we
could read from the Apache Arrow official specification (quoted here):
> _"At first we will return an error when trying to read a Schema
> with an endianness that does not match the underlying system."_
Resolves: #3459
---
arrow-integration-testing/tests/ipc_reader.rs | 31 +++++++--------------------
arrow-ipc/src/gen/Schema.rs | 9 ++++++++
arrow-ipc/src/reader.rs | 6 ++++++
3 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/arrow-integration-testing/tests/ipc_reader.rs
b/arrow-integration-testing/tests/ipc_reader.rs
index 11b8fa8453..88cdad64f9 100644
--- a/arrow-integration-testing/tests/ipc_reader.rs
+++ b/arrow-integration-testing/tests/ipc_reader.rs
@@ -18,6 +18,7 @@
//! Tests for reading the content of [`FileReader`] and [`StreamReader`]
//! in `testing/arrow-ipc-stream/integration/...`
+use arrow::error::ArrowError;
use arrow::ipc::reader::{FileReader, StreamReader};
use arrow::util::test_util::arrow_test_data;
use arrow_integration_testing::read_gzip_json;
@@ -55,26 +56,12 @@ fn read_0_1_7() {
});
}
-#[test]
-#[should_panic(expected = "Big Endian is not supported for Decimal!")]
-fn read_1_0_0_bigendian_decimal_should_panic() {
- let testdata = arrow_test_data();
- verify_arrow_file(&testdata, "1.0.0-bigendian", "generated_decimal");
-}
-
-#[test]
-#[should_panic(expected = "Last offset 687865856 of Utf8 is larger than values
length 41")]
-fn read_1_0_0_bigendian_dictionary_should_panic() {
- // The offsets are not translated for big-endian files
- // https://github.com/apache/arrow-rs/issues/859
- let testdata = arrow_test_data();
- verify_arrow_file(&testdata, "1.0.0-bigendian", "generated_dictionary");
-}
-
#[test]
fn read_1_0_0_bigendian() {
let testdata = arrow_test_data();
let paths = [
+ "generated_decimal",
+ "generated_dictionary",
"generated_interval",
"generated_datetime",
"generated_map",
@@ -91,14 +78,12 @@ fn read_1_0_0_bigendian() {
))
.unwrap();
- FileReader::try_new(file, None).unwrap();
+ let reader = FileReader::try_new(file, None);
- // While the the reader doesn't error but the values are not
- // read correctly on little endian platforms so verifying the
- // contents fails
- //
- // https://github.com/apache/arrow-rs/issues/3459
- //verify_arrow_file(&testdata, "1.0.0-bigendian", path);
+ assert!(reader.is_err());
+ let err = reader.err().unwrap();
+ assert!(matches!(err, ArrowError::IpcError(_)));
+ assert_eq!(err.to_string(), "Ipc error: the endianness of the source
system does not match the endianness of the target system.");
});
}
diff --git a/arrow-ipc/src/gen/Schema.rs b/arrow-ipc/src/gen/Schema.rs
index 282b38b671..0dc5dccd39 100644
--- a/arrow-ipc/src/gen/Schema.rs
+++ b/arrow-ipc/src/gen/Schema.rs
@@ -1039,6 +1039,15 @@ impl Endianness {
_ => None,
}
}
+
+ /// Returns true if the endianness of the source system matches the
endianness of the target system.
+ pub fn equals_to_target_endianness(self) -> bool {
+ match self {
+ Self::Little => cfg!(target_endian = "little"),
+ Self::Big => cfg!(target_endian = "big"),
+ _ => false,
+ }
+ }
}
impl core::fmt::Debug for Endianness {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index 8ac3a387d5..81b8b53073 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -790,6 +790,12 @@ impl FileReaderBuilder {
let total_blocks = blocks.len();
let ipc_schema = footer.schema().unwrap();
+ if !ipc_schema.endianness().equals_to_target_endianness() {
+ return Err(ArrowError::IpcError(
+ "the endianness of the source system does not match the
endianness of the target system.".to_owned()
+ ));
+ }
+
let schema = crate::convert::fb_to_schema(ipc_schema);
let mut custom_metadata = HashMap::new();