This is an automated email from the ASF dual-hosted git repository. mgrigorov pushed a commit to branch miniz_oxide_improvements in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 1664687925a199bdcb9aaa4bc58563be6b20e4d3 Author: Martin Tzvetanov Grigorov <[email protected]> AuthorDate: Mon Apr 7 11:43:29 2025 +0300 Fixes #173 - Introduce DeflateSettings At the moment it could be used to specify the compression level for Codec::Deflate Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> --- avro/src/codec.rs | 40 ++++++++++++++++++++++++++++++++-------- avro/src/lib.rs | 10 +++++----- avro/src/writer.rs | 13 +++++++++---- avro/tests/codecs.rs | 4 ++-- 4 files changed, 48 insertions(+), 19 deletions(-) diff --git a/avro/src/codec.rs b/avro/src/codec.rs index 28f3a5e..434ea7b 100644 --- a/avro/src/codec.rs +++ b/avro/src/codec.rs @@ -19,6 +19,23 @@ use crate::{types::Value, AvroResult, Error}; use strum_macros::{EnumIter, EnumString, IntoStaticStr}; +#[derive(Clone, Copy, Eq, PartialEq, Debug)] +pub struct DeflateSettings { + compression_level: miniz_oxide::deflate::CompressionLevel, +} + +impl DeflateSettings { + fn new(compression_level: miniz_oxide::deflate::CompressionLevel) -> Self { + DeflateSettings { compression_level } + } +} + +impl Default for DeflateSettings { + fn default() -> Self { + Self::new(miniz_oxide::deflate::CompressionLevel::DefaultCompression) + } +} + /// The compression codec used to compress blocks. #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumIter, EnumString, IntoStaticStr)] #[strum(serialize_all = "kebab_case")] @@ -28,7 +45,7 @@ pub enum Codec { /// The `Deflate` codec writes the data block using the deflate algorithm /// as specified in RFC 1951, and typically implemented using the zlib library. /// Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. - Deflate, + Deflate(DeflateSettings), #[cfg(feature = "snappy")] /// The `Snappy` codec uses Google's [Snappy](http://google.github.io/snappy/) /// compression library. Each compressed block is followed by the 4-byte, big-endian @@ -58,8 +75,9 @@ impl Codec { pub fn compress(self, stream: &mut Vec<u8>) -> AvroResult<()> { match self { Codec::Null => (), - Codec::Deflate => { - let compressed = miniz_oxide::deflate::compress_to_vec(stream, 6); + Codec::Deflate(settings) => { + let compressed = + miniz_oxide::deflate::compress_to_vec(stream, settings.compression_level as u8); *stream = compressed; } #[cfg(feature = "snappy")] @@ -99,8 +117,8 @@ impl Codec { } #[cfg(feature = "xz")] Codec::Xz(settings) => { - use xz2::read::XzEncoder; use std::io::Read; + use xz2::read::XzEncoder; let mut encoder = XzEncoder::new(&stream[..], settings.compression_level as u32); let mut buffer = Vec::new(); @@ -116,7 +134,7 @@ impl Codec { pub fn decompress(self, stream: &mut Vec<u8>) -> AvroResult<()> { *stream = match self { Codec::Null => return Ok(()), - Codec::Deflate => miniz_oxide::inflate::decompress_to_vec(stream).map_err(|e| { + Codec::Deflate(_settings) => miniz_oxide::inflate::decompress_to_vec(stream).map_err(|e| { let err = { use miniz_oxide::inflate::TINFLStatus::*; use std::io::{Error,ErrorKind}; @@ -278,7 +296,7 @@ mod tests { #[test] fn deflate_compress_and_decompress() -> TestResult { - compress_and_decompress(Codec::Deflate) + compress_and_decompress(Codec::Deflate(DeflateSettings::default())) } #[cfg(feature = "snappy")] @@ -318,7 +336,10 @@ mod tests { #[test] fn codec_to_str() { assert_eq!(<&str>::from(Codec::Null), "null"); - assert_eq!(<&str>::from(Codec::Deflate), "deflate"); + assert_eq!( + <&str>::from(Codec::Deflate(DeflateSettings::default())), + "deflate" + ); #[cfg(feature = "snappy")] assert_eq!(<&str>::from(Codec::Snappy), "snappy"); @@ -344,7 +365,10 @@ mod tests { use std::str::FromStr; assert_eq!(Codec::from_str("null").unwrap(), Codec::Null); - assert_eq!(Codec::from_str("deflate").unwrap(), Codec::Deflate); + assert_eq!( + Codec::from_str("deflate").unwrap(), + Codec::Deflate(DeflateSettings::default()) + ); #[cfg(feature = "snappy")] assert_eq!(Codec::from_str("snappy").unwrap(), Codec::Snappy); diff --git a/avro/src/lib.rs b/avro/src/lib.rs index 4b95bb5..8e42912 100644 --- a/avro/src/lib.rs +++ b/avro/src/lib.rs @@ -326,7 +326,7 @@ //! # } //! # "#; //! # let schema = Schema::parse_str(raw_schema).unwrap(); -//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); +//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default())); //! ``` //! //! # Reading data @@ -507,7 +507,7 @@ //! quick reference of the library interface: //! //! ``` -//! use apache_avro::{Codec, Reader, Schema, Writer, from_value, types::Record, Error}; +//! use apache_avro::{Codec, DeflateSettings, Reader, Schema, Writer, from_value, types::Record, Error}; //! use serde::{Deserialize, Serialize}; //! //! #[derive(Debug, Deserialize, Serialize)] @@ -532,7 +532,7 @@ //! //! println!("{:?}", schema); //! -//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); +//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default())); //! //! let mut record = Record::new(writer.schema()).unwrap(); //! record.put("a", 27i64); @@ -658,7 +658,7 @@ //! //! println!("{:?}", schema); //! -//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); +//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default())); //! //! let mut record = Record::new(writer.schema()).unwrap(); //! record.put("decimal_fixed", Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be())); @@ -893,7 +893,7 @@ pub use codec::bzip::Bzip2Settings; pub use codec::xz::XzSettings; #[cfg(feature = "zstandard")] pub use codec::zstandard::ZstandardSettings; -pub use codec::Codec; +pub use codec::{Codec, DeflateSettings}; pub use de::from_value; pub use decimal::Decimal; pub use duration::{Days, Duration, Millis, Months}; diff --git a/avro/src/writer.rs b/avro/src/writer.rs index 1ff1339..ed41f0e 100644 --- a/avro/src/writer.rs +++ b/avro/src/writer.rs @@ -709,6 +709,7 @@ mod tests { use pretty_assertions::assert_eq; use serde::{Deserialize, Serialize}; + use crate::codec::DeflateSettings; use apache_avro_test_helper::TestResult; const AVRO_OBJECT_HEADER_LEN: usize = AVRO_OBJECT_HEADER.len(); @@ -1065,14 +1066,18 @@ mod tests { } fn make_writer_with_codec(schema: &Schema) -> Writer<'_, Vec<u8>> { - Writer::with_codec(schema, Vec::new(), Codec::Deflate) + Writer::with_codec( + schema, + Vec::new(), + Codec::Deflate(DeflateSettings::default()), + ) } fn make_writer_with_builder(schema: &Schema) -> Writer<'_, Vec<u8>> { Writer::builder() .writer(Vec::new()) .schema(schema) - .codec(Codec::Deflate) + .codec(Codec::Deflate(DeflateSettings::default())) .block_size(100) .build() } @@ -1094,7 +1099,7 @@ mod tests { zig_i64(3, &mut data)?; data.extend(b"foo"); data.extend(data.clone()); - Codec::Deflate.compress(&mut data)?; + Codec::Deflate(DeflateSettings::default()).compress(&mut data)?; // starts with magic assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); @@ -1142,7 +1147,7 @@ mod tests { ] } "#; - let codec = Codec::Deflate; + let codec = Codec::Deflate(DeflateSettings::default()); let schema = Schema::parse_str(LOGICAL_TYPE_SCHEMA)?; let mut writer = Writer::builder() .schema(&schema) diff --git a/avro/tests/codecs.rs b/avro/tests/codecs.rs index 5017d33..1612ca3 100644 --- a/avro/tests/codecs.rs +++ b/avro/tests/codecs.rs @@ -17,7 +17,7 @@ use apache_avro::{ types::{Record, Value}, - Codec, Reader, Schema, Writer, + Codec, DeflateSettings, Reader, Schema, Writer, }; use apache_avro_test_helper::TestResult; @@ -27,7 +27,7 @@ fn avro_4032_null_codec_settings() -> TestResult { } #[test] fn avro_4032_deflate_codec_settings() -> TestResult { - avro_4032_codec_settings(Codec::Deflate) + avro_4032_codec_settings(Codec::Deflate(DeflateSettings::default())) } #[test]
