This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 8345991ad6 Result into error in case of endianness mismatches (#5301)
8345991ad6 is described below

commit 8345991ad662d63feed35356171842c27fb36783
Author: Paolo Angioletti <[email protected]>
AuthorDate: Mon Jan 15 14:27:25 2024 +0000

    Result into error in case of endianness mismatches (#5301)
    
    This commit implements the Byte Order (Endianness) recommendations we
    could read from the Apache Arrow official specification (quoted here):
    
    > _"At first we will return an error when trying to read a Schema
    > with an endianness that does not match the underlying system."_
    
    Resolves: #3459
---
 arrow-integration-testing/tests/ipc_reader.rs | 31 +++++++--------------------
 arrow-ipc/src/gen/Schema.rs                   |  9 ++++++++
 arrow-ipc/src/reader.rs                       |  6 ++++++
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/arrow-integration-testing/tests/ipc_reader.rs 
b/arrow-integration-testing/tests/ipc_reader.rs
index 11b8fa8453..88cdad64f9 100644
--- a/arrow-integration-testing/tests/ipc_reader.rs
+++ b/arrow-integration-testing/tests/ipc_reader.rs
@@ -18,6 +18,7 @@
 //! Tests for reading the content of  [`FileReader`] and [`StreamReader`]
 //! in `testing/arrow-ipc-stream/integration/...`
 
+use arrow::error::ArrowError;
 use arrow::ipc::reader::{FileReader, StreamReader};
 use arrow::util::test_util::arrow_test_data;
 use arrow_integration_testing::read_gzip_json;
@@ -55,26 +56,12 @@ fn read_0_1_7() {
     });
 }
 
-#[test]
-#[should_panic(expected = "Big Endian is not supported for Decimal!")]
-fn read_1_0_0_bigendian_decimal_should_panic() {
-    let testdata = arrow_test_data();
-    verify_arrow_file(&testdata, "1.0.0-bigendian", "generated_decimal");
-}
-
-#[test]
-#[should_panic(expected = "Last offset 687865856 of Utf8 is larger than values 
length 41")]
-fn read_1_0_0_bigendian_dictionary_should_panic() {
-    // The offsets are not translated for big-endian files
-    // https://github.com/apache/arrow-rs/issues/859
-    let testdata = arrow_test_data();
-    verify_arrow_file(&testdata, "1.0.0-bigendian", "generated_dictionary");
-}
-
 #[test]
 fn read_1_0_0_bigendian() {
     let testdata = arrow_test_data();
     let paths = [
+        "generated_decimal",
+        "generated_dictionary",
         "generated_interval",
         "generated_datetime",
         "generated_map",
@@ -91,14 +78,12 @@ fn read_1_0_0_bigendian() {
         ))
         .unwrap();
 
-        FileReader::try_new(file, None).unwrap();
+        let reader = FileReader::try_new(file, None);
 
-        // While the the reader doesn't error but the values are not
-        // read correctly on little endian platforms so verifying the
-        // contents fails
-        //
-        // https://github.com/apache/arrow-rs/issues/3459
-        //verify_arrow_file(&testdata, "1.0.0-bigendian", path);
+        assert!(reader.is_err());
+        let err = reader.err().unwrap();
+        assert!(matches!(err, ArrowError::IpcError(_)));
+        assert_eq!(err.to_string(), "Ipc error: the endianness of the source 
system does not match the endianness of the target system.");
     });
 }
 
diff --git a/arrow-ipc/src/gen/Schema.rs b/arrow-ipc/src/gen/Schema.rs
index 282b38b671..0dc5dccd39 100644
--- a/arrow-ipc/src/gen/Schema.rs
+++ b/arrow-ipc/src/gen/Schema.rs
@@ -1039,6 +1039,15 @@ impl Endianness {
             _ => None,
         }
     }
+
+    /// Returns true if the endianness of the source system matches the 
endianness of the target system.
+    pub fn equals_to_target_endianness(self) -> bool {
+        match self {
+            Self::Little => cfg!(target_endian = "little"),
+            Self::Big => cfg!(target_endian = "big"),
+            _ => false,
+        }
+    }
 }
 impl core::fmt::Debug for Endianness {
     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index 8ac3a387d5..81b8b53073 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -790,6 +790,12 @@ impl FileReaderBuilder {
         let total_blocks = blocks.len();
 
         let ipc_schema = footer.schema().unwrap();
+        if !ipc_schema.endianness().equals_to_target_endianness() {
+            return Err(ArrowError::IpcError(
+                "the endianness of the source system does not match the 
endianness of the target system.".to_owned()
+            ));
+        }
+
         let schema = crate::convert::fb_to_schema(ipc_schema);
 
         let mut custom_metadata = HashMap::new();

Reply via email to