tustvold commented on code in PR #3013:
URL: https://github.com/apache/arrow-rs/pull/3013#discussion_r1014754258


##########
parquet/src/compression.rs:
##########
@@ -414,15 +472,202 @@ mod lz4_raw_codec {
 #[cfg(any(feature = "lz4", test))]
 pub use lz4_raw_codec::*;
 
+#[cfg(any(feature = "lz4", test))]
+mod lz4_hadoop_codec {
+    use crate::compression::lz4_codec::LZ4Codec;
+    use crate::compression::lz4_raw_codec::LZ4RawCodec;
+    use crate::compression::Codec;
+    use crate::errors::{ParquetError, Result};
+    use std::io;
+
+    /// Size of u32 type.
+    const SIZE_U32: usize = std::mem::size_of::<u32>();
+
+    /// Length of the LZ4_HADOOP prefix.
+    const PREFIX_LEN: usize = SIZE_U32 * 2;
+
+    /// Codec for LZ4 Hadoop compression algorithm.
+    pub struct LZ4HadoopCodec {
+        /// Whether or not to fallback to other LZ4 implementations on error.
+        /// Fallback is done to be backward compatible with older versions of 
this
+        /// library and older versions parquet-cpp.
+        backward_compatible_lz4: bool,
+    }
+
+    impl LZ4HadoopCodec {
+        /// Creates new LZ4 Hadoop compression codec.
+        pub(crate) fn new(backward_compatible_lz4: bool) -> Self {
+            Self {
+                backward_compatible_lz4,
+            }
+        }
+    }
+
+    /// Try to decompress the buffer as if it was compressed with the Hadoop 
Lz4Codec.
+    /// Adapted from pola-rs 
[compression.rs:try_decompress_hadoop](https://pola-rs.github.io/polars/src/parquet2/compression.rs.html#225)

Review Comment:
   :+1: nice to see attribution



##########
parquet/src/arrow/arrow_reader/mod.rs:
##########
@@ -2422,6 +2422,76 @@ mod tests {
         assert_eq!(a.values(), &[42.000000, 7.700000, 42.125000, 7.700000]);
     }
 
+    // This test is to ensure backward compatibility, it test 2 files 
containing the LZ4 CompressionCodec

Review Comment:
   Love the tests



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to