tustvold commented on code in PR #3013: URL: https://github.com/apache/arrow-rs/pull/3013#discussion_r1014754258
########## parquet/src/compression.rs: ########## @@ -414,15 +472,202 @@ mod lz4_raw_codec { #[cfg(any(feature = "lz4", test))] pub use lz4_raw_codec::*; +#[cfg(any(feature = "lz4", test))] +mod lz4_hadoop_codec { + use crate::compression::lz4_codec::LZ4Codec; + use crate::compression::lz4_raw_codec::LZ4RawCodec; + use crate::compression::Codec; + use crate::errors::{ParquetError, Result}; + use std::io; + + /// Size of u32 type. + const SIZE_U32: usize = std::mem::size_of::<u32>(); + + /// Length of the LZ4_HADOOP prefix. + const PREFIX_LEN: usize = SIZE_U32 * 2; + + /// Codec for LZ4 Hadoop compression algorithm. + pub struct LZ4HadoopCodec { + /// Whether or not to fallback to other LZ4 implementations on error. + /// Fallback is done to be backward compatible with older versions of this + /// library and older versions parquet-cpp. + backward_compatible_lz4: bool, + } + + impl LZ4HadoopCodec { + /// Creates new LZ4 Hadoop compression codec. + pub(crate) fn new(backward_compatible_lz4: bool) -> Self { + Self { + backward_compatible_lz4, + } + } + } + + /// Try to decompress the buffer as if it was compressed with the Hadoop Lz4Codec. + /// Adapted from pola-rs [compression.rs:try_decompress_hadoop](https://pola-rs.github.io/polars/src/parquet2/compression.rs.html#225) Review Comment: :+1: nice to see attribution ########## parquet/src/arrow/arrow_reader/mod.rs: ########## @@ -2422,6 +2422,76 @@ mod tests { assert_eq!(a.values(), &[42.000000, 7.700000, 42.125000, 7.700000]); } + // This test is to ensure backward compatibility, it test 2 files containing the LZ4 CompressionCodec Review Comment: Love the tests -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org