This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 2adb64d113 Move from_iter_values to GenericByteArray (#4586)
2adb64d113 is described below

commit 2adb64d113a031432cb4e9e0e37c071ce85ca6d6
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Sun Jul 30 14:14:18 2023 +0100

    Move from_iter_values to GenericByteArray (#4586)
---
 arrow-array/src/array/binary_array.rs | 37 ---------------------------------
 arrow-array/src/array/byte_array.rs   | 35 +++++++++++++++++++++++++++++++
 arrow-array/src/array/string_array.rs | 39 +----------------------------------
 3 files changed, 36 insertions(+), 75 deletions(-)

diff --git a/arrow-array/src/array/binary_array.rs 
b/arrow-array/src/array/binary_array.rs
index 54839604d1..67be3768cc 100644
--- a/arrow-array/src/array/binary_array.rs
+++ b/arrow-array/src/array/binary_array.rs
@@ -19,7 +19,6 @@ use crate::types::{ByteArrayType, GenericBinaryType};
 use crate::{
     Array, GenericByteArray, GenericListArray, GenericStringArray, 
OffsetSizeTrait,
 };
-use arrow_buffer::MutableBuffer;
 use arrow_data::ArrayData;
 use arrow_schema::DataType;
 
@@ -83,42 +82,6 @@ impl<OffsetSize: OffsetSizeTrait> 
GenericBinaryArray<OffsetSize> {
         Self::from(data)
     }
 
-    /// Creates a [`GenericBinaryArray`] based on an iterator of values 
without nulls
-    pub fn from_iter_values<Ptr, I>(iter: I) -> Self
-    where
-        Ptr: AsRef<[u8]>,
-        I: IntoIterator<Item = Ptr>,
-    {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if 
no upper bound.
-
-        let mut offsets =
-            MutableBuffer::new((data_len + 1) * 
std::mem::size_of::<OffsetSize>());
-        let mut values = MutableBuffer::new(0);
-
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        for s in iter {
-            let s = s.as_ref();
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            offsets.push(length_so_far);
-            values.extend_from_slice(s);
-        }
-
-        // iterator size hint may not be correct so compute the actual number 
of offsets
-        assert!(!offsets.is_empty()); // wrote at least one
-        let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 
1;
-
-        let array_data = ArrayData::builder(Self::DATA_TYPE)
-            .len(actual_len)
-            .add_buffer(offsets.into())
-            .add_buffer(values.into());
-        let array_data = unsafe { array_data.build_unchecked() };
-        Self::from(array_data)
-    }
-
     /// Returns an iterator that returns the values of `array.value(i)` for an 
iterator with each element `i`
     pub fn take_iter<'a>(
         &'a self,
diff --git a/arrow-array/src/array/byte_array.rs 
b/arrow-array/src/array/byte_array.rs
index be10a4508d..f694aa32e5 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -182,6 +182,41 @@ impl<T: ByteArrayType> GenericByteArray<T> {
         }
     }
 
+    /// Creates a [`GenericByteArray`] based on an iterator of values without 
nulls
+    pub fn from_iter_values<Ptr, I>(iter: I) -> Self
+    where
+        Ptr: AsRef<T::Native>,
+        I: IntoIterator<Item = Ptr>,
+    {
+        let iter = iter.into_iter();
+        let (_, data_len) = iter.size_hint();
+        let data_len = data_len.expect("Iterator must be sized"); // panic if 
no upper bound.
+
+        let mut offsets =
+            MutableBuffer::new((data_len + 1) * 
std::mem::size_of::<T::Offset>());
+        offsets.push(T::Offset::usize_as(0));
+
+        let mut values = MutableBuffer::new(0);
+        for s in iter {
+            let s: &[u8] = s.as_ref().as_ref();
+            values.extend_from_slice(s);
+            offsets.push(T::Offset::usize_as(values.len()));
+        }
+
+        T::Offset::from_usize(values.len()).expect("offset overflow");
+        let offsets = Buffer::from(offsets);
+
+        // Safety: valid by construction
+        let value_offsets = unsafe { 
OffsetBuffer::new_unchecked(offsets.into()) };
+
+        Self {
+            data_type: T::DATA_TYPE,
+            value_data: values.into(),
+            value_offsets,
+            nulls: None,
+        }
+    }
+
     /// Deconstruct this array into its constituent parts
     pub fn into_parts(self) -> (OffsetBuffer<T::Offset>, Buffer, 
Option<NullBuffer>) {
         (self.value_offsets, self.value_data, self.nulls)
diff --git a/arrow-array/src/array/string_array.rs 
b/arrow-array/src/array/string_array.rs
index f9a3a5fbd0..4c40e8b90c 100644
--- a/arrow-array/src/array/string_array.rs
+++ b/arrow-array/src/array/string_array.rs
@@ -17,8 +17,6 @@
 
 use crate::types::GenericStringType;
 use crate::{GenericBinaryArray, GenericByteArray, GenericListArray, 
OffsetSizeTrait};
-use arrow_buffer::MutableBuffer;
-use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
 
 /// A [`GenericByteArray`] for storing `str`
@@ -40,42 +38,6 @@ impl<OffsetSize: OffsetSizeTrait> 
GenericStringArray<OffsetSize> {
         self.value(i).chars().count()
     }
 
-    /// Creates a [`GenericStringArray`] based on an iterator of values 
without nulls
-    pub fn from_iter_values<Ptr, I>(iter: I) -> Self
-    where
-        Ptr: AsRef<str>,
-        I: IntoIterator<Item = Ptr>,
-    {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if 
no upper bound.
-
-        let mut offsets =
-            MutableBuffer::new((data_len + 1) * 
std::mem::size_of::<OffsetSize>());
-        let mut values = MutableBuffer::new(0);
-
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        for i in iter {
-            let s = i.as_ref();
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            offsets.push(length_so_far);
-            values.extend_from_slice(s.as_bytes());
-        }
-
-        // iterator size hint may not be correct so compute the actual number 
of offsets
-        assert!(!offsets.is_empty()); // wrote at least one
-        let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 
1;
-
-        let array_data = ArrayData::builder(Self::DATA_TYPE)
-            .len(actual_len)
-            .add_buffer(offsets.into())
-            .add_buffer(values.into());
-        let array_data = unsafe { array_data.build_unchecked() };
-        Self::from(array_data)
-    }
-
     /// Returns an iterator that returns the values of `array.value(i)` for an 
iterator with each element `i`
     pub fn take_iter<'a>(
         &'a self,
@@ -210,6 +172,7 @@ mod tests {
     use crate::types::UInt8Type;
     use crate::Array;
     use arrow_buffer::Buffer;
+    use arrow_data::ArrayData;
     use arrow_schema::Field;
     use std::sync::Arc;
 

Reply via email to