This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6aaff7e38a Add `is_valid` and `truncate` methods to 
`NullBufferBuilder` (#7013)
6aaff7e38a is described below

commit 6aaff7e38a573f797b31f89f869c3706cbe26e37
Author: Ian Lai <[email protected]>
AuthorDate: Mon Jan 27 20:25:20 2025 +0800

    Add `is_valid` and `truncate` methods to `NullBufferBuilder` (#7013)
    
    * feat: add required methods to NullBufferBuilder
    
    * modify realated unit tests
    
    * remove blank
    
    * fix typo & naming
    
    * add more assert in unit tests
    
    * remove the change for sync self.capacity in 
apped/truncate/new_from_buffer calls
    
    * remove ';'
    
    * refactor NullBufferBuilder methods to use references for bitmap_builder
    
    * Add another test for truncate
    
    * Update arrow-buffer/src/builder/null.rs
    
    * Remove capacity and use allocated_size instead
    
    * fix truncate
    
    ---------
    
    Co-authored-by: Ian Lai <[email protected]>
    Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow-buffer/src/builder/null.rs | 77 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/arrow-buffer/src/builder/null.rs b/arrow-buffer/src/builder/null.rs
index f88868db8a..fdd2bb4dfc 100644
--- a/arrow-buffer/src/builder/null.rs
+++ b/arrow-buffer/src/builder/null.rs
@@ -28,6 +28,8 @@ use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
 /// This optimization is **very** important for the performance as it avoids
 /// allocating memory for the null buffer when there are no nulls.
 ///
+/// See [`Self::allocated_size`] to get the current memory allocated by the 
builder.
+///
 /// # Example
 /// ```
 /// # use arrow_buffer::NullBufferBuilder;
@@ -46,9 +48,15 @@ use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
 /// ```
 #[derive(Debug)]
 pub struct NullBufferBuilder {
+    /// The bitmap builder to store the null buffer:
+    /// * `Some` if any nulls have been appended ("materialized")
+    /// * `None` if no nulls have been appended.
     bitmap_builder: Option<BooleanBufferBuilder>,
-    /// Store the length of the buffer before materializing.
+    /// Length of the buffer before materializing.
+    ///
+    /// if `bitmap_buffer` buffer is `Some`, this value is not used.
     len: usize,
+    /// Initial capacity of the `bitmap_builder`, when it is materialized.
     capacity: usize,
 }
 
@@ -78,7 +86,6 @@ impl NullBufferBuilder {
     /// Creates a new builder from a `MutableBuffer`.
     pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
         let capacity = buffer.len() * 8;
-
         assert!(len <= capacity);
 
         let bitmap_builder = 
Some(BooleanBufferBuilder::new_from_buffer(buffer, len));
@@ -137,6 +144,28 @@ impl NullBufferBuilder {
         }
     }
 
+    /// Gets a bit in the buffer at `index`
+    #[inline]
+    pub fn is_valid(&self, index: usize) -> bool {
+        if let Some(ref buf) = self.bitmap_builder {
+            buf.get_bit(index)
+        } else {
+            true
+        }
+    }
+
+    /// Truncates the builder to the given length
+    ///
+    /// If `len` is greater than the buffer's current length, this has no 
effect
+    #[inline]
+    pub fn truncate(&mut self, len: usize) {
+        if let Some(buf) = self.bitmap_builder.as_mut() {
+            buf.truncate(len);
+        } else if len <= self.len {
+            self.len = len
+        }
+    }
+
     /// Appends a boolean slice into the builder
     /// to indicate the validations of these items.
     pub fn append_slice(&mut self, slice: &[bool]) {
@@ -221,6 +250,7 @@ mod tests {
         builder.append_n_nulls(2);
         builder.append_n_non_nulls(2);
         assert_eq!(6, builder.len());
+        assert_eq!(512, builder.allocated_size());
 
         let buf = builder.finish().unwrap();
         assert_eq!(&[0b110010_u8], buf.validity());
@@ -233,6 +263,7 @@ mod tests {
         builder.append_n_nulls(2);
         builder.append_slice(&[false, false, false]);
         assert_eq!(6, builder.len());
+        assert_eq!(512, builder.allocated_size());
 
         let buf = builder.finish().unwrap();
         assert_eq!(&[0b0_u8], buf.validity());
@@ -245,6 +276,7 @@ mod tests {
         builder.append_n_non_nulls(2);
         builder.append_slice(&[true, true, true]);
         assert_eq!(6, builder.len());
+        assert_eq!(0, builder.allocated_size());
 
         let buf = builder.finish();
         assert!(buf.is_none());
@@ -266,4 +298,45 @@ mod tests {
         let buf = builder.finish().unwrap();
         assert_eq!(&[0b1011_u8], buf.validity());
     }
+
+    #[test]
+    fn test_null_buffer_builder_is_valid() {
+        let mut builder = NullBufferBuilder::new(0);
+        builder.append_n_non_nulls(6);
+        assert!(builder.is_valid(0));
+
+        builder.append_null();
+        assert!(!builder.is_valid(6));
+
+        builder.append_non_null();
+        assert!(builder.is_valid(7));
+    }
+
+    #[test]
+    fn test_null_buffer_builder_truncate() {
+        let mut builder = NullBufferBuilder::new(10);
+        builder.append_n_non_nulls(16);
+        assert_eq!(builder.as_slice(), None);
+        builder.truncate(20);
+        assert_eq!(builder.as_slice(), None);
+        assert_eq!(builder.len(), 16);
+        assert_eq!(builder.allocated_size(), 0);
+        builder.truncate(14);
+        assert_eq!(builder.as_slice(), None);
+        assert_eq!(builder.len(), 14);
+        builder.append_null();
+        builder.append_non_null();
+        assert_eq!(builder.as_slice().unwrap(), &[0xFF, 0b10111111]);
+        assert_eq!(builder.allocated_size(), 512);
+    }
+
+    #[test]
+    fn test_null_buffer_builder_truncate_never_materialized() {
+        let mut builder = NullBufferBuilder::new(0);
+        assert_eq!(builder.len(), 0);
+        builder.append_n_nulls(2); // doesn't materialize
+        assert_eq!(builder.len(), 2);
+        builder.truncate(1);
+        assert_eq!(builder.len(), 1);
+    }
 }

Reply via email to