tustvold commented on code in PR #5440:
URL: https://github.com/apache/arrow-rs/pull/5440#discussion_r1525624215


##########
arrow-buffer/src/lib.rs:
##########
@@ -21,17 +21,15 @@
 #![cfg_attr(miri, feature(strict_provenance))]
 
 pub mod alloc;
+mod bigint;
 pub mod buffer;
-pub use buffer::*;
-
 pub mod builder;
-pub use builder::*;
-
-mod bigint;
 mod bytes;
 mod native;
-pub use bigint::i256;
+mod util;
 
+pub use bigint::i256;

Review Comment:
   Can we revert this formatting please



##########
arrow-buffer/src/builder/offset.rs:
##########
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::ops::{Add, Sub};
+
+use crate::{ArrowNativeType, OffsetBuffer};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct OffsetBufferBuilder<O: ArrowNativeType> {
+    offsets: Vec<O>,
+}
+
+/// Builder of [`OffsetBuffer`]
+impl<O: ArrowNativeType + Add<Output = O> + Sub<Output = O>> 
OffsetBufferBuilder<O> {
+    /// Create a new builder containing only 1 zero offset.
+    pub fn new(capacity: usize) -> Self {
+        let mut offsets = Vec::with_capacity(capacity + 1);
+        offsets.push(O::usize_as(0));
+        unsafe { Self::new_unchecked(offsets) }
+    }
+
+    /// Create a new builder containing capacity number of zero offsets.
+    pub fn new_zeroed(capacity: usize) -> Self {
+        let offsets = vec![O::usize_as(0); capacity + 1];
+        unsafe { Self::new_unchecked(offsets) }
+    }
+
+    /// Create from offsets.
+    /// # Safety
+    /// Caller guarantees that offsets are monotonically increasing values.
+    #[inline]
+    pub unsafe fn new_unchecked(offsets: Vec<O>) -> Self {
+        Self { offsets }
+    }
+
+    /// Try to safely push a length of usize type into builder
+    #[inline]
+    pub fn try_push_length(&mut self, length: usize) -> Result<(), String> {
+        let last_offset = self.offsets.last().unwrap();
+        let next_offset =
+            *last_offset + O::from_usize(length).ok_or("offset 
overflow".to_string())?;
+        self.offsets.push(next_offset);
+        Ok(())
+    }
+
+    /// Push a length of usize type without overflow checking.
+    /// # Safety
+    /// This doesn't check offset overflow, the caller must ensure it.
+    #[inline]
+    pub unsafe fn push_length_unchecked(&mut self, length: usize) {
+        let last_offset = self.offsets.last().unwrap();
+        let next_offset = *last_offset + O::usize_as(length);
+        self.offsets.push(next_offset);
+    }
+
+    /// Takes the builder itself and returns an [`OffsetBuffer`]
+    pub fn finish(self) -> OffsetBuffer<O> {
+        unsafe { OffsetBuffer::new_unchecked(self.offsets.into()) }
+    }
+
+    /// Capacity of offsets
+    pub fn capacity(&self) -> usize {
+        self.offsets.capacity() - 1
+    }
+
+    /// Length of the Offsets
+    #[allow(clippy::len_without_is_empty)]
+    pub fn len(&self) -> usize {
+        self.offsets.len()
+    }
+
+    /// Last offset
+    pub fn last(&self) -> O {
+        *self.offsets.last().unwrap()
+    }
+
+    pub fn reserve(&mut self, additional: usize) {
+        self.offsets.reserve(additional);
+    }
+
+    pub fn reserve_exact(&mut self, additional: usize) {
+        self.offsets.reserve_exact(additional);
+    }
+
+    pub fn shrink_to_fit(&mut self) {
+        self.offsets.shrink_to_fit();
+    }
+}
+
+/// Convert an [`IntoIterator`] of lengths to [`OffsetBufferBuilder`]
+impl<IntoIter: IntoIterator<Item = O>, O: ArrowNativeType + Add<Output = O> + 
Sub<Output = O>>
+    From<IntoIter> for OffsetBufferBuilder<O>
+{
+    fn from(lengths: IntoIter) -> Self {
+        let mut offsets_vec: Vec<O> = lengths
+            .into_iter()
+            .scan(O::usize_as(0), |prev, len| {
+                *prev = *prev + len;
+                Some(*prev)
+            })
+            .collect();
+        offsets_vec.insert(0, O::usize_as(0));
+        unsafe { OffsetBufferBuilder::new_unchecked(offsets_vec) }
+    }
+}
+
+/// Convert an [`FromIterator`] of lengths to [`OffsetBufferBuilder`]
+impl<O: ArrowNativeType + Add<Output = O> + Sub<Output = O>> FromIterator<O>

Review Comment:
   Can we remove these FromIterator and IntoIter methods, I think the following 
would be very confusing, as it isn't immediately clear the iterator is lengths 
not offsets (especially given the `O` typing)
   
   ```
   let x = [0_i32, 3_i32, 
6_i32].into_iter().collect::<OffsetBufferBuilder>().finish();
   assert_eq!(&x, &[0_i32, 0, 3, 9])
   ```



##########
arrow-buffer/src/builder/offset.rs:
##########
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::ops::{Add, Sub};
+
+use crate::{ArrowNativeType, OffsetBuffer};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct OffsetBufferBuilder<O: ArrowNativeType> {
+    offsets: Vec<O>,
+}
+
+/// Builder of [`OffsetBuffer`]
+impl<O: ArrowNativeType + Add<Output = O> + Sub<Output = O>> 
OffsetBufferBuilder<O> {
+    /// Create a new builder containing only 1 zero offset.
+    pub fn new(capacity: usize) -> Self {
+        let mut offsets = Vec::with_capacity(capacity + 1);
+        offsets.push(O::usize_as(0));
+        unsafe { Self::new_unchecked(offsets) }
+    }
+
+    /// Create a new builder containing capacity number of zero offsets.
+    pub fn new_zeroed(capacity: usize) -> Self {
+        let offsets = vec![O::usize_as(0); capacity + 1];
+        unsafe { Self::new_unchecked(offsets) }
+    }
+
+    /// Create from offsets.
+    /// # Safety
+    /// Caller guarantees that offsets are monotonically increasing values.
+    #[inline]
+    pub unsafe fn new_unchecked(offsets: Vec<O>) -> Self {
+        Self { offsets }
+    }
+
+    /// Try to safely push a length of usize type into builder
+    #[inline]
+    pub fn try_push_length(&mut self, length: usize) -> Result<(), String> {
+        let last_offset = self.offsets.last().unwrap();
+        let next_offset =

Review Comment:
   This needs to be checked addition. I would recommend doing checked addition 
using `usize`, and then convert. This will simplify the generics



##########
arrow-buffer/src/builder/offset.rs:
##########
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::ops::{Add, Sub};
+
+use crate::{ArrowNativeType, OffsetBuffer};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct OffsetBufferBuilder<O: ArrowNativeType> {
+    offsets: Vec<O>,
+}
+
+/// Builder of [`OffsetBuffer`]
+impl<O: ArrowNativeType + Add<Output = O> + Sub<Output = O>> 
OffsetBufferBuilder<O> {
+    /// Create a new builder containing only 1 zero offset.
+    pub fn new(capacity: usize) -> Self {
+        let mut offsets = Vec::with_capacity(capacity + 1);
+        offsets.push(O::usize_as(0));
+        unsafe { Self::new_unchecked(offsets) }
+    }
+
+    /// Create a new builder containing capacity number of zero offsets.
+    pub fn new_zeroed(capacity: usize) -> Self {
+        let offsets = vec![O::usize_as(0); capacity + 1];
+        unsafe { Self::new_unchecked(offsets) }
+    }
+
+    /// Create from offsets.
+    /// # Safety
+    /// Caller guarantees that offsets are monotonically increasing values.

Review Comment:
   It also needs to guarantee that the array is not empty, see 
https://docs.rs/arrow-buffer/latest/arrow_buffer/buffer/struct.OffsetBuffer.html#method.new_unchecked



##########
arrow-buffer/src/builder/offset.rs:
##########
@@ -142,9 +135,9 @@ impl<O: ArrowNativeType + Add<Output = O> + Sub<Output = 
O>> Extend<O> for Offse
             None => lengths_iter.size_hint().0,
         };
         self.reserve(size_hint);
-        for len in lengths_iter {
-            self.push_length(len);
-        }
+        lengths_iter.for_each(|len| unsafe {

Review Comment:
   Why is this safe?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to