This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f8acd2b66eb Add OffsetBufferBuilder (#5532)
f8acd2b66eb is described below
commit f8acd2b66eb5984229414f1641a4c9fa2b655953
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Fri Mar 29 07:39:55 2024 +0000
Add OffsetBufferBuilder (#5532)
* Add OffsetsBuilder
* Fix doc
* Update type constraints
---------
Co-authored-by: doki <[email protected]>
---
arrow-buffer/Cargo.toml | 4 ++
arrow-buffer/benches/offset.rs | 49 +++++++++++++++
arrow-buffer/src/buffer/offset.rs | 9 ++-
arrow-buffer/src/builder/mod.rs | 5 +-
arrow-buffer/src/builder/offset.rs | 125 +++++++++++++++++++++++++++++++++++++
5 files changed, 189 insertions(+), 3 deletions(-)
diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml
index 746045cc8dd..8bc33b1874e 100644
--- a/arrow-buffer/Cargo.toml
+++ b/arrow-buffer/Cargo.toml
@@ -46,4 +46,8 @@ rand = { version = "0.8", default-features = false, features
= ["std", "std_rng"
[[bench]]
name = "i256"
+harness = false
+
+[[bench]]
+name = "offset"
harness = false
\ No newline at end of file
diff --git a/arrow-buffer/benches/offset.rs b/arrow-buffer/benches/offset.rs
new file mode 100644
index 00000000000..1aea5024fbd
--- /dev/null
+++ b/arrow-buffer/benches/offset.rs
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_buffer::{OffsetBuffer, OffsetBufferBuilder};
+use criterion::*;
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+const SIZE: usize = 1024;
+
+fn criterion_benchmark(c: &mut Criterion) {
+ let mut rng = StdRng::seed_from_u64(42);
+ let lengths: Vec<usize> = black_box((0..SIZE).map(|_|
rng.gen_range(0..40)).collect());
+
+ c.bench_function("OffsetBuffer::from_lengths", |b| {
+ b.iter(|| OffsetBuffer::<i32>::from_lengths(lengths.iter().copied()));
+ });
+
+ c.bench_function("OffsetBufferBuilder::push_length", |b| {
+ b.iter(|| {
+ let mut builder = OffsetBufferBuilder::<i32>::new(lengths.len());
+ lengths.iter().for_each(|x| builder.push_length(*x));
+ builder.finish()
+ });
+ });
+
+ let offsets =
OffsetBuffer::<i32>::from_lengths(lengths.iter().copied()).into_inner();
+
+ c.bench_function("OffsetBuffer::new", |b| {
+ b.iter(|| OffsetBuffer::new(black_box(offsets.clone())));
+ });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/arrow-buffer/src/buffer/offset.rs
b/arrow-buffer/src/buffer/offset.rs
index 652d30c3b0a..e9087d30098 100644
--- a/arrow-buffer/src/buffer/offset.rs
+++ b/arrow-buffer/src/buffer/offset.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::buffer::ScalarBuffer;
-use crate::{ArrowNativeType, MutableBuffer};
+use crate::{ArrowNativeType, MutableBuffer, OffsetBufferBuilder};
use std::ops::Deref;
/// A non-empty buffer of monotonically increasing, positive integers.
@@ -55,7 +55,6 @@ use std::ops::Deref;
/// (offsets[i],
/// offsets[i+1])
/// ```
-
#[derive(Debug, Clone)]
pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
@@ -174,6 +173,12 @@ impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
}
}
+impl<O: ArrowNativeType> From<OffsetBufferBuilder<O>> for OffsetBuffer<O> {
+ fn from(value: OffsetBufferBuilder<O>) -> Self {
+ value.finish()
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs
index d5d5a7d3f18..f7e0e29dace 100644
--- a/arrow-buffer/src/builder/mod.rs
+++ b/arrow-buffer/src/builder/mod.rs
@@ -18,9 +18,12 @@
//! Buffer builders
mod boolean;
-pub use boolean::*;
mod null;
+mod offset;
+
+pub use boolean::*;
pub use null::*;
+pub use offset::*;
use crate::{ArrowNativeType, Buffer, MutableBuffer};
use std::{iter, marker::PhantomData};
diff --git a/arrow-buffer/src/builder/offset.rs
b/arrow-buffer/src/builder/offset.rs
new file mode 100644
index 00000000000..6a236d2a3e1
--- /dev/null
+++ b/arrow-buffer/src/builder/offset.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::ops::Deref;
+
+use crate::{ArrowNativeType, OffsetBuffer};
+
+#[derive(Debug)]
+pub struct OffsetBufferBuilder<O: ArrowNativeType> {
+ offsets: Vec<O>,
+ last_offset: usize,
+}
+
+/// Builder of [`OffsetBuffer`]
+impl<O: ArrowNativeType> OffsetBufferBuilder<O> {
+ /// Create a new builder with space for `capacity + 1` offsets
+ pub fn new(capacity: usize) -> Self {
+ let mut offsets = Vec::with_capacity(capacity + 1);
+ offsets.push(O::usize_as(0));
+ Self {
+ offsets,
+ last_offset: 0,
+ }
+ }
+
+ /// Push a slice of `length` bytes
+ ///
+ /// # Panics
+ ///
+ /// Panics if adding `length` would overflow `usize`
+ #[inline]
+ pub fn push_length(&mut self, length: usize) {
+ self.last_offset =
self.last_offset.checked_add(length).expect("overflow");
+ self.offsets.push(O::usize_as(self.last_offset))
+ }
+
+ /// Reserve space for at least `additional` further offsets
+ #[inline]
+ pub fn reserve(&mut self, additional: usize) {
+ self.offsets.reserve(additional);
+ }
+
+ /// Takes the builder itself and returns an [`OffsetBuffer`]
+ ///
+ /// # Panics
+ ///
+ /// Panics if offsets overflow `O`
+ pub fn finish(self) -> OffsetBuffer<O> {
+ O::from_usize(self.last_offset).expect("overflow");
+ unsafe { OffsetBuffer::new_unchecked(self.offsets.into()) }
+ }
+
+ /// Builds the [OffsetBuffer] without resetting the builder.
+ ///
+ /// # Panics
+ ///
+ /// Panics if offsets overflow `O`
+ pub fn finish_cloned(&self) -> OffsetBuffer<O> {
+ O::from_usize(self.last_offset).expect("overflow");
+ unsafe { OffsetBuffer::new_unchecked(self.offsets.clone().into()) }
+ }
+}
+
+impl<O: ArrowNativeType> Deref for OffsetBufferBuilder<O> {
+ type Target = [O];
+
+ fn deref(&self) -> &Self::Target {
+ self.offsets.as_ref()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::OffsetBufferBuilder;
+
+ #[test]
+ fn test_basic() {
+ let mut builder = OffsetBufferBuilder::<i32>::new(5);
+ assert_eq!(builder.len(), 1);
+ assert_eq!(&*builder, &[0]);
+ let finished = builder.finish_cloned();
+ assert_eq!(finished.len(), 1);
+ assert_eq!(&*finished, &[0]);
+
+ builder.push_length(2);
+ builder.push_length(6);
+ builder.push_length(0);
+ builder.push_length(13);
+
+ let finished = builder.finish();
+ assert_eq!(&*finished, &[0, 2, 8, 8, 21]);
+ }
+
+ #[test]
+ #[should_panic(expected = "overflow")]
+ fn test_usize_overflow() {
+ let mut builder = OffsetBufferBuilder::<i32>::new(5);
+ builder.push_length(1);
+ builder.push_length(usize::MAX);
+ builder.finish();
+ }
+
+ #[test]
+ #[should_panic(expected = "overflow")]
+ fn test_i32_overflow() {
+ let mut builder = OffsetBufferBuilder::<i32>::new(5);
+ builder.push_length(1);
+ builder.push_length(i32::MAX as usize);
+ builder.finish();
+ }
+}