This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f8acd2b66eb Add OffsetBufferBuilder (#5532)
f8acd2b66eb is described below

commit f8acd2b66eb5984229414f1641a4c9fa2b655953
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Fri Mar 29 07:39:55 2024 +0000

    Add OffsetBufferBuilder (#5532)
    
    * Add OffsetsBuilder
    
    * Fix doc
    
    * Update type constraints
    
    ---------
    
    Co-authored-by: doki <[email protected]>
---
 arrow-buffer/Cargo.toml            |   4 ++
 arrow-buffer/benches/offset.rs     |  49 +++++++++++++++
 arrow-buffer/src/buffer/offset.rs  |   9 ++-
 arrow-buffer/src/builder/mod.rs    |   5 +-
 arrow-buffer/src/builder/offset.rs | 125 +++++++++++++++++++++++++++++++++++++
 5 files changed, 189 insertions(+), 3 deletions(-)

diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml
index 746045cc8dd..8bc33b1874e 100644
--- a/arrow-buffer/Cargo.toml
+++ b/arrow-buffer/Cargo.toml
@@ -46,4 +46,8 @@ rand = { version = "0.8", default-features = false, features 
= ["std", "std_rng"
 
 [[bench]]
 name = "i256"
+harness = false
+
+[[bench]]
+name = "offset"
 harness = false
\ No newline at end of file
diff --git a/arrow-buffer/benches/offset.rs b/arrow-buffer/benches/offset.rs
new file mode 100644
index 00000000000..1aea5024fbd
--- /dev/null
+++ b/arrow-buffer/benches/offset.rs
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_buffer::{OffsetBuffer, OffsetBufferBuilder};
+use criterion::*;
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+const SIZE: usize = 1024;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut rng = StdRng::seed_from_u64(42);
+    let lengths: Vec<usize> = black_box((0..SIZE).map(|_| 
rng.gen_range(0..40)).collect());
+
+    c.bench_function("OffsetBuffer::from_lengths", |b| {
+        b.iter(|| OffsetBuffer::<i32>::from_lengths(lengths.iter().copied()));
+    });
+
+    c.bench_function("OffsetBufferBuilder::push_length", |b| {
+        b.iter(|| {
+            let mut builder = OffsetBufferBuilder::<i32>::new(lengths.len());
+            lengths.iter().for_each(|x| builder.push_length(*x));
+            builder.finish()
+        });
+    });
+
+    let offsets = 
OffsetBuffer::<i32>::from_lengths(lengths.iter().copied()).into_inner();
+
+    c.bench_function("OffsetBuffer::new", |b| {
+        b.iter(|| OffsetBuffer::new(black_box(offsets.clone())));
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/arrow-buffer/src/buffer/offset.rs 
b/arrow-buffer/src/buffer/offset.rs
index 652d30c3b0a..e9087d30098 100644
--- a/arrow-buffer/src/buffer/offset.rs
+++ b/arrow-buffer/src/buffer/offset.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::buffer::ScalarBuffer;
-use crate::{ArrowNativeType, MutableBuffer};
+use crate::{ArrowNativeType, MutableBuffer, OffsetBufferBuilder};
 use std::ops::Deref;
 
 /// A non-empty buffer of monotonically increasing, positive integers.
@@ -55,7 +55,6 @@ use std::ops::Deref;
 ///  (offsets[i],
 ///   offsets[i+1])
 /// ```
-
 #[derive(Debug, Clone)]
 pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
 
@@ -174,6 +173,12 @@ impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
     }
 }
 
+impl<O: ArrowNativeType> From<OffsetBufferBuilder<O>> for OffsetBuffer<O> {
+    fn from(value: OffsetBufferBuilder<O>) -> Self {
+        value.finish()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs
index d5d5a7d3f18..f7e0e29dace 100644
--- a/arrow-buffer/src/builder/mod.rs
+++ b/arrow-buffer/src/builder/mod.rs
@@ -18,9 +18,12 @@
 //! Buffer builders
 
 mod boolean;
-pub use boolean::*;
 mod null;
+mod offset;
+
+pub use boolean::*;
 pub use null::*;
+pub use offset::*;
 
 use crate::{ArrowNativeType, Buffer, MutableBuffer};
 use std::{iter, marker::PhantomData};
diff --git a/arrow-buffer/src/builder/offset.rs 
b/arrow-buffer/src/builder/offset.rs
new file mode 100644
index 00000000000..6a236d2a3e1
--- /dev/null
+++ b/arrow-buffer/src/builder/offset.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::ops::Deref;
+
+use crate::{ArrowNativeType, OffsetBuffer};
+
+#[derive(Debug)]
+pub struct OffsetBufferBuilder<O: ArrowNativeType> {
+    offsets: Vec<O>,
+    last_offset: usize,
+}
+
+/// Builder of [`OffsetBuffer`]
+impl<O: ArrowNativeType> OffsetBufferBuilder<O> {
+    /// Create a new builder with space for `capacity + 1` offsets
+    pub fn new(capacity: usize) -> Self {
+        let mut offsets = Vec::with_capacity(capacity + 1);
+        offsets.push(O::usize_as(0));
+        Self {
+            offsets,
+            last_offset: 0,
+        }
+    }
+
+    /// Push a slice of `length` bytes
+    ///
+    /// # Panics
+    ///
+    /// Panics if adding `length` would overflow `usize`
+    #[inline]
+    pub fn push_length(&mut self, length: usize) {
+        self.last_offset = 
self.last_offset.checked_add(length).expect("overflow");
+        self.offsets.push(O::usize_as(self.last_offset))
+    }
+
+    /// Reserve space for at least `additional` further offsets
+    #[inline]
+    pub fn reserve(&mut self, additional: usize) {
+        self.offsets.reserve(additional);
+    }
+
+    /// Takes the builder itself and returns an [`OffsetBuffer`]
+    ///
+    /// # Panics
+    ///
+    /// Panics if offsets overflow `O`
+    pub fn finish(self) -> OffsetBuffer<O> {
+        O::from_usize(self.last_offset).expect("overflow");
+        unsafe { OffsetBuffer::new_unchecked(self.offsets.into()) }
+    }
+
+    /// Builds the [OffsetBuffer] without resetting the builder.
+    ///
+    /// # Panics
+    ///
+    /// Panics if offsets overflow `O`
+    pub fn finish_cloned(&self) -> OffsetBuffer<O> {
+        O::from_usize(self.last_offset).expect("overflow");
+        unsafe { OffsetBuffer::new_unchecked(self.offsets.clone().into()) }
+    }
+}
+
+impl<O: ArrowNativeType> Deref for OffsetBufferBuilder<O> {
+    type Target = [O];
+
+    fn deref(&self) -> &Self::Target {
+        self.offsets.as_ref()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::OffsetBufferBuilder;
+
+    #[test]
+    fn test_basic() {
+        let mut builder = OffsetBufferBuilder::<i32>::new(5);
+        assert_eq!(builder.len(), 1);
+        assert_eq!(&*builder, &[0]);
+        let finished = builder.finish_cloned();
+        assert_eq!(finished.len(), 1);
+        assert_eq!(&*finished, &[0]);
+
+        builder.push_length(2);
+        builder.push_length(6);
+        builder.push_length(0);
+        builder.push_length(13);
+
+        let finished = builder.finish();
+        assert_eq!(&*finished, &[0, 2, 8, 8, 21]);
+    }
+
+    #[test]
+    #[should_panic(expected = "overflow")]
+    fn test_usize_overflow() {
+        let mut builder = OffsetBufferBuilder::<i32>::new(5);
+        builder.push_length(1);
+        builder.push_length(usize::MAX);
+        builder.finish();
+    }
+
+    #[test]
+    #[should_panic(expected = "overflow")]
+    fn test_i32_overflow() {
+        let mut builder = OffsetBufferBuilder::<i32>::new(5);
+        builder.push_length(1);
+        builder.push_length(i32::MAX as usize);
+        builder.finish();
+    }
+}

Reply via email to