This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.11 by this push:
     new 1357cd2c7 AVRO-3835: [Rust] Get rid of byteorder and zerocopy 
dependencies (#2455)
1357cd2c7 is described below

commit 1357cd2c7e186501ed6045e6ac82264a45b6a4e5
Author: Martin Grigorov <[email protected]>
AuthorDate: Fri Aug 18 23:35:02 2023 +0300

    AVRO-3835: [Rust] Get rid of byteorder and zerocopy dependencies (#2455)
    
    Use standard APIs for converting integers to/from byte arrays.
    Get rid of byteorder and zerocopy dependencies.
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    (cherry picked from commit 426c59398754c2b2935e890c0aa16093eda3af3b)
---
 lang/rust/Cargo.lock           | 23 ---------------
 lang/rust/avro/Cargo.toml      |  2 --
 lang/rust/avro/src/codec.rs    | 15 +++++-----
 lang/rust/avro/src/duration.rs | 64 ++++++++++++++++++------------------------
 lang/rust/avro/src/rabin.rs    | 19 +++++++------
 5 files changed, 45 insertions(+), 78 deletions(-)

diff --git a/lang/rust/Cargo.lock b/lang/rust/Cargo.lock
index 72488e3a9..bf7fc242a 100644
--- a/lang/rust/Cargo.lock
+++ b/lang/rust/Cargo.lock
@@ -68,7 +68,6 @@ dependencies = [
  "anyhow",
  "apache-avro-derive",
  "apache-avro-test-helper",
- "byteorder",
  "bzip2",
  "crc32fast",
  "criterion",
@@ -93,7 +92,6 @@ dependencies = [
  "typed-builder",
  "uuid",
  "xz2",
- "zerocopy",
  "zstd",
 ]
 
@@ -1348,27 +1346,6 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
 
-[[package]]
-name = "zerocopy"
-version = "0.6.3"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "f3b9c234616391070b0b173963ebc65a9195068e7ed3731c6edac2ec45ebe106"
-dependencies = [
- "byteorder",
- "zerocopy-derive",
-]
-
-[[package]]
-name = "zerocopy-derive"
-version = "0.6.3"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "8f7f3a471f98d0a61c34322fbbfd10c384b07687f680d4119813713f72308d91"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "zstd"
 version = "0.12.4"
diff --git a/lang/rust/avro/Cargo.toml b/lang/rust/avro/Cargo.toml
index 5fb216bd6..29ef270d8 100644
--- a/lang/rust/avro/Cargo.toml
+++ b/lang/rust/avro/Cargo.toml
@@ -55,7 +55,6 @@ name = "single"
 
 [dependencies]
 apache-avro-derive = { default-features = false, version = "0.16.0", path = 
"../avro_derive", optional = true }
-byteorder = { default-features = false, version = "1.4.3" }
 bzip2 = { default-features = false, version = "0.4.4", optional = true }
 crc32fast = { default-features = false, version = "1.3.2", optional = true }
 digest = { default-features = false, version = "0.10.7", features = 
["core-api"] }
@@ -73,7 +72,6 @@ thiserror = { default-features = false, version = "1.0.47" }
 typed-builder = { default-features = false, version = "0.15.2" }
 uuid = { default-features = false, version = "1.4.1", features = ["serde", 
"std"] }
 xz2 = { default-features = false, version = "0.1.7", optional = true }
-zerocopy = { default-features = false, version = "0.6.3" }
 zstd = { default-features = false, version = "0.12.4+zstd.1.5.2", optional = 
true }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
diff --git a/lang/rust/avro/src/codec.rs b/lang/rust/avro/src/codec.rs
index 4e3634697..a394cad25 100644
--- a/lang/rust/avro/src/codec.rs
+++ b/lang/rust/avro/src/codec.rs
@@ -82,8 +82,6 @@ impl Codec {
             }
             #[cfg(feature = "snappy")]
             Codec::Snappy => {
-                use byteorder::ByteOrder;
-
                 let mut encoded: Vec<u8> = vec![0; 
snap::raw::max_compress_len(stream.len())];
                 let compressed_size = snap::raw::Encoder::new()
                     .compress(&stream[..], &mut encoded[..])
@@ -92,8 +90,10 @@ impl Codec {
                 let mut hasher = Hasher::new();
                 hasher.update(&stream[..]);
                 let checksum = hasher.finalize();
-                byteorder::BigEndian::write_u32(&mut 
encoded[compressed_size..], checksum);
-                encoded.truncate(compressed_size + 4);
+                let checksum_as_bytes = checksum.to_be_bytes();
+                let checksum_len = checksum_as_bytes.len();
+                encoded.truncate(compressed_size + checksum_len);
+                encoded[compressed_size..].copy_from_slice(&checksum_as_bytes);
 
                 *stream = encoded;
             }
@@ -137,8 +137,6 @@ impl Codec {
             }
             #[cfg(feature = "snappy")]
             Codec::Snappy => {
-                use byteorder::ByteOrder;
-
                 let decompressed_size = 
snap::raw::decompress_len(&stream[..stream.len() - 4])
                     .map_err(Error::GetSnappyDecompressLen)?;
                 let mut decoded = vec![0; decompressed_size];
@@ -146,7 +144,10 @@ impl Codec {
                     .decompress(&stream[..stream.len() - 4], &mut decoded[..])
                     .map_err(Error::SnappyDecompress)?;
 
-                let expected = 
byteorder::BigEndian::read_u32(&stream[stream.len() - 4..]);
+                let mut last_four: [u8; 4] = [0; 4];
+                last_four.copy_from_slice(&stream[(stream.len() - 4)..]);
+                let expected: u32 = u32::from_be_bytes(last_four);
+
                 let mut hasher = Hasher::new();
                 hasher.update(&decoded);
                 let actual = hasher.finalize();
diff --git a/lang/rust/avro/src/duration.rs b/lang/rust/avro/src/duration.rs
index 3bdfe4d23..4aa6bd53a 100644
--- a/lang/rust/avro/src/duration.rs
+++ b/lang/rust/avro/src/duration.rs
@@ -14,10 +14,6 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-
-use byteorder::LittleEndian;
-use zerocopy::U32;
-
 /// A struct representing duration that hides the details of endianness and 
conversion between
 /// platform-native u32 and byte arrays.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
@@ -28,83 +24,77 @@ pub struct Duration {
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub struct Months(U32<LittleEndian>);
+pub struct Months(u32);
 
 impl Months {
     pub fn new(months: u32) -> Self {
-        Self(U32::new(months))
+        Self(months)
+    }
+
+    fn as_bytes(&self) -> [u8; 4] {
+        self.0.to_le_bytes()
     }
 }
 
 impl From<Months> for u32 {
     fn from(days: Months) -> Self {
-        days.0.get()
+        days.0
     }
 }
 
 impl From<[u8; 4]> for Months {
     fn from(bytes: [u8; 4]) -> Self {
-        Self(U32::from(bytes))
-    }
-}
-
-impl AsRef<[u8; 4]> for Months {
-    fn as_ref(&self) -> &[u8; 4] {
-        self.0.as_ref()
+        Self(u32::from_le_bytes(bytes))
     }
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub struct Days(U32<LittleEndian>);
+pub struct Days(u32);
 
 impl Days {
     pub fn new(days: u32) -> Self {
-        Self(U32::new(days))
+        Self(days)
+    }
+
+    fn as_bytes(&self) -> [u8; 4] {
+        self.0.to_le_bytes()
     }
 }
 
 impl From<Days> for u32 {
     fn from(days: Days) -> Self {
-        days.0.get()
+        days.0
     }
 }
 
 impl From<[u8; 4]> for Days {
     fn from(bytes: [u8; 4]) -> Self {
-        Self(U32::from(bytes))
-    }
-}
-
-impl AsRef<[u8; 4]> for Days {
-    fn as_ref(&self) -> &[u8; 4] {
-        self.0.as_ref()
+        Self(u32::from_le_bytes(bytes))
     }
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub struct Millis(U32<LittleEndian>);
+pub struct Millis(u32);
 
 impl Millis {
     pub fn new(millis: u32) -> Self {
-        Self(U32::new(millis))
+        Self(millis)
+    }
+
+    fn as_bytes(&self) -> [u8; 4] {
+        self.0.to_le_bytes()
     }
 }
 
 impl From<Millis> for u32 {
     fn from(days: Millis) -> Self {
-        days.0.get()
+        days.0
     }
 }
 
 impl From<[u8; 4]> for Millis {
     fn from(bytes: [u8; 4]) -> Self {
-        Self(U32::from(bytes))
-    }
-}
-
-impl AsRef<[u8; 4]> for Millis {
-    fn as_ref(&self) -> &[u8; 4] {
-        self.0.as_ref()
+        Self(u32::from_le_bytes(bytes))
     }
 }
 
@@ -137,9 +127,9 @@ impl Duration {
 impl From<Duration> for [u8; 12] {
     fn from(duration: Duration) -> Self {
         let mut bytes = [0u8; 12];
-        bytes[0..4].copy_from_slice(duration.months.as_ref());
-        bytes[4..8].copy_from_slice(duration.days.as_ref());
-        bytes[8..12].copy_from_slice(duration.millis.as_ref());
+        bytes[0..4].copy_from_slice(&duration.months.as_bytes());
+        bytes[4..8].copy_from_slice(&duration.days.as_bytes());
+        bytes[8..12].copy_from_slice(&duration.millis.as_bytes());
         bytes
     }
 }
diff --git a/lang/rust/avro/src/rabin.rs b/lang/rust/avro/src/rabin.rs
index ce5f0761f..fc63f8999 100644
--- a/lang/rust/avro/src/rabin.rs
+++ b/lang/rust/avro/src/rabin.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 //! Implementation of the Rabin fingerprint algorithm
-use byteorder::{ByteOrder, LittleEndian};
 use digest::{
     consts::U8, core_api::OutputSizeUser, generic_array::GenericArray, 
FixedOutput,
     FixedOutputReset, HashMarker, Output, Reset, Update,
@@ -61,7 +60,7 @@ lazy_static! {
 /// assert_eq!(result[..], hex!("60335ba6d0415528"));
 /// ```
 ///
-/// To convert the digest to the commonly used 64-bit integer value, you can 
use the byteorder crate:
+/// To convert the digest to the commonly used 64-bit integer value, you can 
use the i64::from_le_bytes() function
 ///
 /// ```rust
 /// # use apache_avro::rabin::Rabin;
@@ -75,9 +74,8 @@ lazy_static! {
 /// # let result = hasher.finalize();
 ///
 /// # assert_eq!(result[..], hex!("60335ba6d0415528"));
-/// use byteorder::{ByteOrder, LittleEndian};
 ///
-/// let i = LittleEndian::read_i64(&result.to_vec());
+/// let i = i64::from_le_bytes(result.try_into().unwrap());
 ///
 /// assert_eq!(i, 2906301498937520992)
 /// ```
@@ -103,7 +101,7 @@ impl Update for Rabin {
 
 impl FixedOutput for Rabin {
     fn finalize_into(self, out: &mut GenericArray<u8, Self::OutputSize>) {
-        LittleEndian::write_i64(out, self.result);
+        out.copy_from_slice(&self.result.to_le_bytes());
     }
 }
 
@@ -123,7 +121,7 @@ impl HashMarker for Rabin {}
 
 impl FixedOutputReset for Rabin {
     fn finalize_into_reset(&mut self, out: &mut Output<Self>) {
-        LittleEndian::write_i64(out, self.result);
+        out.copy_from_slice(&self.result.to_le_bytes());
         self.reset();
     }
 }
@@ -131,13 +129,13 @@ impl FixedOutputReset for Rabin {
 #[cfg(test)]
 mod tests {
     use super::Rabin;
-    use byteorder::{ByteOrder, LittleEndian};
+    use apache_avro_test_helper::TestResult;
     use digest::Digest;
     use pretty_assertions::assert_eq;
 
     // See: 
https://github.com/apache/avro/blob/master/share/test/data/schema-tests.txt
     #[test]
-    fn test1() {
+    fn test1() -> TestResult {
         let data: &[(&str, i64)] = &[
             (r#""null""#, 7195948357588979594),
             (r#""boolean""#, -6970731678124411036),
@@ -155,8 +153,11 @@ mod tests {
 
         for (s, fp) in data {
             hasher.update(s.as_bytes());
-            let result = LittleEndian::read_i64(&hasher.finalize_reset());
+            let res: &[u8] = &hasher.finalize_reset();
+            let result = i64::from_le_bytes(res.try_into()?);
             assert_eq!(*fp, result);
         }
+
+        Ok(())
     }
 }

Reply via email to