This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 6aaff7e38a Add `is_valid` and `truncate` methods to
`NullBufferBuilder` (#7013)
6aaff7e38a is described below
commit 6aaff7e38a573f797b31f89f869c3706cbe26e37
Author: Ian Lai <[email protected]>
AuthorDate: Mon Jan 27 20:25:20 2025 +0800
Add `is_valid` and `truncate` methods to `NullBufferBuilder` (#7013)
* feat: add required methods to NullBufferBuilder
* modify realated unit tests
* remove blank
* fix typo & naming
* add more assert in unit tests
* remove the change for sync self.capacity in
apped/truncate/new_from_buffer calls
* remove ';'
* refactor NullBufferBuilder methods to use references for bitmap_builder
* Add another test for truncate
* Update arrow-buffer/src/builder/null.rs
* Remove capacity and use allocated_size instead
* fix truncate
---------
Co-authored-by: Ian Lai <[email protected]>
Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-buffer/src/builder/null.rs | 77 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 75 insertions(+), 2 deletions(-)
diff --git a/arrow-buffer/src/builder/null.rs b/arrow-buffer/src/builder/null.rs
index f88868db8a..fdd2bb4dfc 100644
--- a/arrow-buffer/src/builder/null.rs
+++ b/arrow-buffer/src/builder/null.rs
@@ -28,6 +28,8 @@ use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
/// This optimization is **very** important for the performance as it avoids
/// allocating memory for the null buffer when there are no nulls.
///
+/// See [`Self::allocated_size`] to get the current memory allocated by the
builder.
+///
/// # Example
/// ```
/// # use arrow_buffer::NullBufferBuilder;
@@ -46,9 +48,15 @@ use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
/// ```
#[derive(Debug)]
pub struct NullBufferBuilder {
+ /// The bitmap builder to store the null buffer:
+ /// * `Some` if any nulls have been appended ("materialized")
+ /// * `None` if no nulls have been appended.
bitmap_builder: Option<BooleanBufferBuilder>,
- /// Store the length of the buffer before materializing.
+ /// Length of the buffer before materializing.
+ ///
+ /// if `bitmap_buffer` buffer is `Some`, this value is not used.
len: usize,
+ /// Initial capacity of the `bitmap_builder`, when it is materialized.
capacity: usize,
}
@@ -78,7 +86,6 @@ impl NullBufferBuilder {
/// Creates a new builder from a `MutableBuffer`.
pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
let capacity = buffer.len() * 8;
-
assert!(len <= capacity);
let bitmap_builder =
Some(BooleanBufferBuilder::new_from_buffer(buffer, len));
@@ -137,6 +144,28 @@ impl NullBufferBuilder {
}
}
+ /// Gets a bit in the buffer at `index`
+ #[inline]
+ pub fn is_valid(&self, index: usize) -> bool {
+ if let Some(ref buf) = self.bitmap_builder {
+ buf.get_bit(index)
+ } else {
+ true
+ }
+ }
+
+ /// Truncates the builder to the given length
+ ///
+ /// If `len` is greater than the buffer's current length, this has no
effect
+ #[inline]
+ pub fn truncate(&mut self, len: usize) {
+ if let Some(buf) = self.bitmap_builder.as_mut() {
+ buf.truncate(len);
+ } else if len <= self.len {
+ self.len = len
+ }
+ }
+
/// Appends a boolean slice into the builder
/// to indicate the validations of these items.
pub fn append_slice(&mut self, slice: &[bool]) {
@@ -221,6 +250,7 @@ mod tests {
builder.append_n_nulls(2);
builder.append_n_non_nulls(2);
assert_eq!(6, builder.len());
+ assert_eq!(512, builder.allocated_size());
let buf = builder.finish().unwrap();
assert_eq!(&[0b110010_u8], buf.validity());
@@ -233,6 +263,7 @@ mod tests {
builder.append_n_nulls(2);
builder.append_slice(&[false, false, false]);
assert_eq!(6, builder.len());
+ assert_eq!(512, builder.allocated_size());
let buf = builder.finish().unwrap();
assert_eq!(&[0b0_u8], buf.validity());
@@ -245,6 +276,7 @@ mod tests {
builder.append_n_non_nulls(2);
builder.append_slice(&[true, true, true]);
assert_eq!(6, builder.len());
+ assert_eq!(0, builder.allocated_size());
let buf = builder.finish();
assert!(buf.is_none());
@@ -266,4 +298,45 @@ mod tests {
let buf = builder.finish().unwrap();
assert_eq!(&[0b1011_u8], buf.validity());
}
+
+ #[test]
+ fn test_null_buffer_builder_is_valid() {
+ let mut builder = NullBufferBuilder::new(0);
+ builder.append_n_non_nulls(6);
+ assert!(builder.is_valid(0));
+
+ builder.append_null();
+ assert!(!builder.is_valid(6));
+
+ builder.append_non_null();
+ assert!(builder.is_valid(7));
+ }
+
+ #[test]
+ fn test_null_buffer_builder_truncate() {
+ let mut builder = NullBufferBuilder::new(10);
+ builder.append_n_non_nulls(16);
+ assert_eq!(builder.as_slice(), None);
+ builder.truncate(20);
+ assert_eq!(builder.as_slice(), None);
+ assert_eq!(builder.len(), 16);
+ assert_eq!(builder.allocated_size(), 0);
+ builder.truncate(14);
+ assert_eq!(builder.as_slice(), None);
+ assert_eq!(builder.len(), 14);
+ builder.append_null();
+ builder.append_non_null();
+ assert_eq!(builder.as_slice().unwrap(), &[0xFF, 0b10111111]);
+ assert_eq!(builder.allocated_size(), 512);
+ }
+
+ #[test]
+ fn test_null_buffer_builder_truncate_never_materialized() {
+ let mut builder = NullBufferBuilder::new(0);
+ assert_eq!(builder.len(), 0);
+ builder.append_n_nulls(2); // doesn't materialize
+ assert_eq!(builder.len(), 2);
+ builder.truncate(1);
+ assert_eq!(builder.len(), 1);
+ }
}