This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 590f506e4 Fix BitReader::get_batch zero extension (#1708) (#1722)
590f506e4 is described below

commit 590f506e4278c9472968acb59c603e913d712b72
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Mon May 23 09:43:41 2022 +0100

    Fix BitReader::get_batch zero extension (#1708) (#1722)
    
    * Fix BitReader::get_batch zero extension (#1708)
    
    * Fix tests
---
 parquet/src/util/bit_util.rs | 76 +++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 33 deletions(-)

diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs
index 67eafd33b..288c771b0 100644
--- a/parquet/src/util/bit_util.rs
+++ b/parquet/src/util/bit_util.rs
@@ -568,40 +568,35 @@ impl BitReader {
             }
         }
 
-        unsafe {
-            let in_buf = &self.buffer.data()[self.byte_offset..];
-            let mut in_ptr = in_buf as *const [u8] as *const u8 as *const u32;
-            if size_of::<T>() == 4 {
-                while values_to_read - i >= 32 {
-                    let out_ptr = &mut batch[i..] as *mut [T] as *mut T as 
*mut u32;
-                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
-                    self.byte_offset += 4 * num_bits;
-                    i += 32;
-                }
-            } else {
-                let mut out_buf = [0u32; 32];
-                let out_ptr = &mut out_buf as &mut [u32] as *mut [u32] as *mut 
u32;
-                while values_to_read - i >= 32 {
-                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
-                    self.byte_offset += 4 * num_bits;
-                    for n in 0..32 {
-                        // We need to copy from smaller size to bigger size to 
avoid
-                        // overwriting other memory regions.
-                        if size_of::<T>() > size_of::<u32>() {
-                            std::ptr::copy_nonoverlapping(
-                                out_buf[n..].as_ptr() as *const u32,
-                                &mut batch[i] as *mut T as *mut u32,
-                                1,
-                            );
-                        } else {
-                            std::ptr::copy_nonoverlapping(
-                                out_buf[n..].as_ptr() as *const T,
-                                &mut batch[i] as *mut T,
-                                1,
-                            );
-                        }
-                        i += 1;
+        let in_buf = &self.buffer.data()[self.byte_offset..];
+        let mut in_ptr = in_buf as *const [u8] as *const u8 as *const u32;
+        if size_of::<T>() == 4 {
+            while values_to_read - i >= 32 {
+                let out_ptr = &mut batch[i..] as *mut [T] as *mut T as *mut 
u32;
+                in_ptr = unsafe { unpack32(in_ptr, out_ptr, num_bits) };
+                self.byte_offset += 4 * num_bits;
+                i += 32;
+            }
+        } else {
+            let mut out_buf = [0u32; 32];
+            let out_ptr = &mut out_buf as &mut [u32] as *mut [u32] as *mut u32;
+            while values_to_read - i >= 32 {
+                in_ptr = unsafe { unpack32(in_ptr, out_ptr, num_bits) };
+                self.byte_offset += 4 * num_bits;
+
+                for out in out_buf {
+                    // Zero-allocate buffer
+                    let mut out_bytes = T::Buffer::default();
+                    let in_bytes = out.to_le_bytes();
+
+                    {
+                        let out_bytes = out_bytes.as_mut();
+                        let len = out_bytes.len().min(in_bytes.len());
+                        (&mut 
out_bytes[..len]).copy_from_slice(&in_bytes[..len]);
                     }
+
+                    batch[i] = T::from_le_bytes(out_bytes);
+                    i += 1;
                 }
             }
         }
@@ -1193,4 +1188,19 @@ mod tests {
             );
         });
     }
+
+    #[test]
+    fn test_get_batch_zero_extend() {
+        let to_read = vec![0xFF; 4];
+        let mut reader = BitReader::new(ByteBufferPtr::new(to_read));
+
+        // Create a non-zeroed output buffer
+        let mut output = [u64::MAX; 32];
+        reader.get_batch(&mut output, 1);
+
+        for v in output {
+            // Values should be read correctly
+            assert_eq!(v, 1);
+        }
+    }
 }

Reply via email to