This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 087f34b70e9 Refine documentation for `unary_mut` and `binary_mut` 
(#5798)
087f34b70e9 is described below

commit 087f34b70e97ee85e1a54b3c45c5ed814f500b0a
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Jun 6 07:28:55 2024 -0400

    Refine documentation for `unary_mut` and `binary_mut` (#5798)
    
    * Refine documentation for unary_mut and binary_mut,
    
    * Update arrow-array/src/array/primitive_array.rs
    
    * Update binary_mut example to show different array types
---
 arrow-arith/src/arity.rs                 | 111 ++++++++++++++++++++------
 arrow-array/src/array/primitive_array.rs | 133 +++++++++++++++++++++++--------
 arrow-array/src/types.rs                 |   6 +-
 arrow-buffer/src/native.rs               |  11 ++-
 4 files changed, 194 insertions(+), 67 deletions(-)

diff --git a/arrow-arith/src/arity.rs b/arrow-arith/src/arity.rs
index 99e17c7bdaf..17c1b0dbccf 100644
--- a/arrow-arith/src/arity.rs
+++ b/arrow-arith/src/arity.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines kernels suitable to perform operations to primitive arrays.
+//! Kernels for operating on [`PrimitiveArray`]s
 
 use arrow_array::builder::BufferBuilder;
 use arrow_array::types::ArrowDictionaryKeyType;
@@ -162,18 +162,38 @@ where
     }
 }
 
+/// Allies a binary infallable function to two [`PrimitiveArray`]s,
+/// producing a new [`PrimitiveArray`]
+///
+/// # Details
+///
 /// Given two arrays of length `len`, calls `op(a[i], b[i])` for `i` in 
`0..len`, collecting
-/// the results in a [`PrimitiveArray`]. If any index is null in either `a` or 
`b`, the
+/// the results in a [`PrimitiveArray`].
+///
+/// If any index is null in either `a` or `b`, the
 /// corresponding index in the result will also be null
 ///
-/// Like [`unary`] the provided function is evaluated for every index, 
ignoring validity. This
-/// is beneficial when the cost of the operation is low compared to the cost 
of branching, and
-/// especially when the operation can be vectorised, however, requires `op` to 
be infallible
-/// for all possible values of its inputs
+/// Like [`unary`], the `op` is evaluated for every element in the two arrays,
+/// including those elements which are NULL. This is beneficial as the cost of
+/// the operation is low compared to the cost of branching, and especially when
+/// the operation can be vectorised, however, requires `op` to be infallible 
for
+/// all possible values of its inputs
 ///
-/// # Error
+/// # Errors
+///
+/// * if the arrays have different lengths.
 ///
-/// This function gives error if the arrays have different lengths
+/// # Example
+/// ```
+/// # use arrow_arith::arity::binary;
+/// # use arrow_array::{Float32Array, Int32Array};
+/// # use arrow_array::types::Int32Type;
+/// let a = Float32Array::from(vec![Some(5.1f32), None, Some(6.8), Some(7.2)]);
+/// let b = Int32Array::from(vec![1, 2, 4, 9]);
+/// // compute int(a) + b for each element
+/// let c = binary(&a, &b, |a, b| a as i32 + b).unwrap();
+/// assert_eq!(c, Int32Array::from(vec![Some(6), None, Some(10), Some(16)]));
+/// ```
 pub fn binary<A, B, F, O>(
     a: &PrimitiveArray<A>,
     b: &PrimitiveArray<B>,
@@ -207,23 +227,70 @@ where
     Ok(PrimitiveArray::new(buffer.into(), nulls))
 }
 
-/// Given two arrays of length `len`, calls `op(a[i], b[i])` for `i` in 
`0..len`, mutating
-/// the mutable [`PrimitiveArray`] `a`. If any index is null in either `a` or 
`b`, the
-/// corresponding index in the result will also be null.
+/// Applies a binary and infallible function to values in two arrays, replacing
+/// the values in the first array in place.
+///
+/// # Details
+///
+/// Given two arrays of length `len`, calls `op(a[i], b[i])` for `i` in
+/// `0..len`, modifying the [`PrimitiveArray`] `a` in place, if possible.
+///
+/// If any index is null in either `a` or `b`, the corresponding index in the
+/// result will also be null.
 ///
-/// Mutable primitive array means that the buffer is not shared with other 
arrays.
-/// As a result, this mutates the buffer directly without allocating new 
buffer.
+/// # Buffer Reuse
+///
+/// If the underlying buffers in `a` are not shared with other arrays,  mutates
+/// the underlying buffer in place, without allocating.
+///
+/// If the underlying buffer in `a` are shared, returns Err(self)
 ///
 /// Like [`unary`] the provided function is evaluated for every index, 
ignoring validity. This
 /// is beneficial when the cost of the operation is low compared to the cost 
of branching, and
 /// especially when the operation can be vectorised, however, requires `op` to 
be infallible
 /// for all possible values of its inputs
 ///
-/// # Error
+/// # Errors
+///
+/// * If the arrays have different lengths
+/// * If the array is not mutable (see "Buffer Reuse")
+///
+/// # See Also
+///
+/// * Documentation on [`PrimitiveArray::unary_mut`] for operating on 
[`ArrayRef`].
 ///
-/// This function gives error if the arrays have different lengths.
-/// This function gives error of original [`PrimitiveArray`] `a` if it is not 
a mutable
-/// primitive array.
+/// # Example
+/// ```
+/// # use arrow_arith::arity::binary_mut;
+/// # use arrow_array::{Float32Array, Int32Array};
+/// # use arrow_array::types::Int32Type;
+/// // compute a + b for each element
+/// let a = Float32Array::from(vec![Some(5.1f32), None, Some(6.8)]);
+/// let b = Int32Array::from(vec![Some(1), None, Some(2)]);
+/// // compute a + b, updating the value in a in place if possible
+/// let a = binary_mut(a, &b, |a, b| a + b as f32).unwrap().unwrap();
+/// // a is updated in place
+/// assert_eq!(a, Float32Array::from(vec![Some(6.1), None, Some(8.8)]));
+/// ```
+///
+/// # Example with shared buffers
+/// ```
+/// # use arrow_arith::arity::binary_mut;
+/// # use arrow_array::Float32Array;
+/// # use arrow_array::types::Int32Type;
+/// let a = Float32Array::from(vec![Some(5.1f32), None, Some(6.8)]);
+/// let b = Float32Array::from(vec![Some(1.0f32), None, Some(2.0)]);
+/// // a_clone shares the buffer with a
+/// let a_cloned = a.clone();
+/// // try to update a in place, but it is shared. Returns Err(a)
+/// let a = binary_mut(a, &b, |a, b| a + b).unwrap_err();
+/// assert_eq!(a_cloned, a);
+/// // drop shared reference
+/// drop(a_cloned);
+/// // now a is not shared, so we can update it in place
+/// let a = binary_mut(a, &b, |a, b| a + b).unwrap().unwrap();
+/// assert_eq!(a, Float32Array::from(vec![Some(6.1), None, Some(8.8)]));
+/// ```
 pub fn binary_mut<T, U, F>(
     a: PrimitiveArray<T>,
     b: &PrimitiveArray<U>,
@@ -319,15 +386,7 @@ where
 ///
 /// Like [`try_unary`] the function is only evaluated for non-null indices
 ///
-/// Mutable primitive array means that the buffer is not shared with other 
arrays.
-/// As a result, this mutates the buffer directly without allocating new 
buffer.
-///
-/// # Error
-///
-/// Return an error if the arrays have different lengths or
-/// the operation is under erroneous.
-/// This function gives error of original [`PrimitiveArray`] `a` if it is not 
a mutable
-/// primitive array.
+/// See [`binary_mut`] for errors and buffer reuse information
 pub fn try_binary_mut<T, F>(
     a: PrimitiveArray<T>,
     b: &PrimitiveArray<T>,
diff --git a/arrow-array/src/array/primitive_array.rs 
b/arrow-array/src/array/primitive_array.rs
index 919a1010116..917822d4248 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -419,7 +419,7 @@ pub type Decimal256Array = PrimitiveArray<Decimal256Type>;
 
 pub use crate::types::ArrowPrimitiveType;
 
-/// An array of [primitive 
values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
+/// An array of primitive values, of type [`ArrowPrimitiveType`]
 ///
 /// # Example: From a Vec
 ///
@@ -480,6 +480,19 @@ pub use crate::types::ArrowPrimitiveType;
 /// assert_eq!(array.values(), &[1, 0, 2]);
 /// assert!(array.is_null(1));
 /// ```
+///
+/// # Example: Get a `PrimitiveArray` from an [`ArrayRef`]
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_array::{Array, cast::AsArray, ArrayRef, Float32Array, 
PrimitiveArray};
+/// # use arrow_array::types::{Float32Type};
+/// # use arrow_schema::DataType;
+/// # let array: ArrayRef =  Arc::new(Float32Array::from(vec![1.2, 2.3]));
+/// // will panic if the array is not a Float32Array
+/// assert_eq!(&DataType::Float32, array.data_type());
+/// let f32_array: Float32Array  = array.as_primitive().clone();
+/// assert_eq!(f32_array, Float32Array::from(vec![1.2, 2.3]));
+/// ```
 pub struct PrimitiveArray<T: ArrowPrimitiveType> {
     data_type: DataType,
     /// Values data
@@ -732,22 +745,34 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         PrimitiveArray::from(unsafe { d.build_unchecked() })
     }
 
-    /// Applies an unary and infallible function to a primitive array.
-    /// This is the fastest way to perform an operation on a primitive array 
when
-    /// the benefits of a vectorized operation outweigh the cost of branching 
nulls and non-nulls.
+    /// Applies a unary infallible function to a primitive array, producing a
+    /// new array of potentially different type.
+    ///
+    /// This is the fastest way to perform an operation on a primitive array
+    /// when the benefits of a vectorized operation outweigh the cost of
+    /// branching nulls and non-nulls.
     ///
-    /// # Implementation
+    /// See also
+    /// * [`Self::unary_mut`] for in place modification.
+    /// * [`Self::try_unary`] for fallible operations.
+    /// * [`arrow::compute::binary`] for binary operations
+    ///
+    /// [`arrow::compute::binary`]: 
https://docs.rs/arrow/latest/arrow/compute/fn.binary.html
+    /// # Null Handling
+    ///
+    /// Applies the function for all values, including those on null slots. 
This
+    /// will often allow the compiler to generate faster vectorized code, but
+    /// requires that the operation must be infallible (not error/panic) for 
any
+    /// value of the corresponding type or this function may panic.
     ///
-    /// This will apply the function for all values, including those on null 
slots.
-    /// This implies that the operation must be infallible for any value of 
the corresponding type
-    /// or this function may panic.
     /// # Example
     /// ```rust
-    /// # use arrow_array::{Int32Array, types::Int32Type};
+    /// # use arrow_array::{Int32Array, Float32Array, types::Int32Type};
     /// # fn main() {
     /// let array = Int32Array::from(vec![Some(5), Some(7), None]);
-    /// let c = array.unary(|x| x * 2 + 1);
-    /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
+    /// // Create a new array with the value of applying sqrt
+    /// let c = array.unary(|x| f32::sqrt(x as f32));
+    /// assert_eq!(c, Float32Array::from(vec![Some(2.236068), Some(2.6457512), 
None]));
     /// # }
     /// ```
     pub fn unary<F, O>(&self, op: F) -> PrimitiveArray<O>
@@ -766,24 +791,50 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         PrimitiveArray::new(buffer.into(), nulls)
     }
 
-    /// Applies an unary and infallible function to a mutable primitive array.
-    /// Mutable primitive array means that the buffer is not shared with other 
arrays.
-    /// As a result, this mutates the buffer directly without allocating new 
buffer.
+    /// Applies a unary and infallible function to the array in place if 
possible.
+    ///
+    /// # Buffer Reuse
+    ///
+    /// If the underlying buffers are not shared with other arrays,  mutates 
the
+    /// underlying buffer in place, without allocating.
+    ///
+    /// If the underlying buffer is shared, returns Err(self)
     ///
-    /// # Implementation
+    /// # Null Handling
+    ///
+    /// See [`Self::unary`] for more information on null handling.
     ///
-    /// This will apply the function for all values, including those on null 
slots.
-    /// This implies that the operation must be infallible for any value of 
the corresponding type
-    /// or this function may panic.
     /// # Example
+    ///
     /// ```rust
     /// # use arrow_array::{Int32Array, types::Int32Type};
-    /// # fn main() {
     /// let array = Int32Array::from(vec![Some(5), Some(7), None]);
+    /// // Apply x*2+1 to the data in place, no allocations
     /// let c = array.unary_mut(|x| x * 2 + 1).unwrap();
     /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
-    /// # }
     /// ```
+    ///
+    /// # Example: modify [`ArrayRef`] in place, if not shared
+    ///
+    /// It is also possible to modify an [`ArrayRef`] if there are no other
+    /// references to the underlying buffer.
+    ///
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use arrow_array::{Array, cast::AsArray, ArrayRef, Int32Array, 
PrimitiveArray, types::Int32Type};
+    /// # let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(5), 
Some(7), None]));
+    /// // Convert to Int32Array (panic's if array.data_type is not Int32)
+    /// let a = array.as_primitive::<Int32Type>().clone();
+    /// // Try to apply x*2+1 to the data in place, fails because array is 
still shared
+    /// a.unary_mut(|x| x * 2 + 1).unwrap_err();
+    /// // Try again, this time dropping the last remaining reference
+    /// let a = array.as_primitive::<Int32Type>().clone();
+    /// drop(array);
+    /// // Now we can apply the operation in place
+    /// let c = a.unary_mut(|x| x * 2 + 1).unwrap();
+    /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
+    /// ```
+
     pub fn unary_mut<F>(self, op: F) -> Result<PrimitiveArray<T>, 
PrimitiveArray<T>>
     where
         F: Fn(T::Native) -> T::Native,
@@ -796,11 +847,12 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         Ok(builder.finish())
     }
 
-    /// Applies a unary and fallible function to all valid values in a 
primitive array
+    /// Applies a unary fallible function to all valid values in a primitive
+    /// array, producing a new array of potentially different type.
     ///
-    /// This is unlike [`Self::unary`] which will apply an infallible function 
to all rows
-    /// regardless of validity, in many cases this will be significantly 
faster and should
-    /// be preferred if `op` is infallible.
+    /// Applies `op` to only rows that are valid, which is often significantly
+    /// slower than [`Self::unary`], which should be preferred if `op` is
+    /// fallible.
     ///
     /// Note: LLVM is currently unable to effectively vectorize fallible 
operations
     pub fn try_unary<F, O, E>(&self, op: F) -> Result<PrimitiveArray<O>, E>
@@ -829,13 +881,16 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         Ok(PrimitiveArray::new(values, nulls))
     }
 
-    /// Applies an unary and fallible function to all valid values in a 
mutable primitive array.
-    /// Mutable primitive array means that the buffer is not shared with other 
arrays.
-    /// As a result, this mutates the buffer directly without allocating new 
buffer.
+    /// Applies a unary fallible function to all valid values in a mutable
+    /// primitive array.
+    ///
+    /// # Null Handling
+    ///
+    /// See [`Self::try_unary`] for more information on null handling.
+    ///
+    /// # Buffer Reuse
     ///
-    /// This is unlike [`Self::unary_mut`] which will apply an infallible 
function to all rows
-    /// regardless of validity, in many cases this will be significantly 
faster and should
-    /// be preferred if `op` is infallible.
+    /// See [`Self::unary_mut`] for more information on buffer reuse.
     ///
     /// This returns an `Err` when the input array is shared buffer with other
     /// array. In the case, returned `Err` wraps input array. If the function
@@ -870,9 +925,9 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
 
     /// Applies a unary and nullable function to all valid values in a 
primitive array
     ///
-    /// This is unlike [`Self::unary`] which will apply an infallible function 
to all rows
-    /// regardless of validity, in many cases this will be significantly 
faster and should
-    /// be preferred if `op` is infallible.
+    /// Applies `op` to only rows that are valid, which is often significantly
+    /// slower than [`Self::unary`], which should be preferred if `op` is
+    /// fallible.
     ///
     /// Note: LLVM is currently unable to effectively vectorize fallible 
operations
     pub fn unary_opt<F, O>(&self, op: F) -> PrimitiveArray<O>
@@ -915,8 +970,16 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         PrimitiveArray::new(values, Some(nulls))
     }
 
-    /// Returns `PrimitiveBuilder` of this primitive array for mutating its 
values if the underlying
-    /// data buffer is not shared by others.
+    /// Returns a `PrimitiveBuilder` for this array, suitable for mutating 
values
+    /// in place.
+    ///
+    /// # Buffer Reuse
+    ///
+    /// If the underlying data buffer has no other outstanding references, the
+    /// buffer is used without copying.
+    ///
+    /// If the underlying data buffer does have outstanding references, returns
+    /// `Err(self)`
     pub fn into_builder(self) -> Result<PrimitiveBuilder<T>, Self> {
         let len = self.len();
         let data = self.into_data();
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 462776005f9..ac77fd45d03 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -47,9 +47,11 @@ impl BooleanType {
     pub const DATA_TYPE: DataType = DataType::Boolean;
 }
 
-/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with 
the
-/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that 
implement [`ArrowNativeType`].
+/// Trait for [primitive values], bridging the dynamic-typed nature of Arrow
+/// (via [`DataType`]) with the static-typed nature of rust types
+/// ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
 ///
+/// [primitive values]: 
https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout
 /// [`ArrowNativeType`]: arrow_buffer::ArrowNativeType
 pub trait ArrowPrimitiveType: primitive::PrimitiveTypeSealed + 'static {
     /// Corresponding Rust native type for the primitive type.
diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs
index e05c1311ff3..c563f73cf5b 100644
--- a/arrow-buffer/src/native.rs
+++ b/arrow-buffer/src/native.rs
@@ -22,11 +22,14 @@ mod private {
     pub trait Sealed {}
 }
 
-/// Trait expressing a Rust type that has the same in-memory representation
-/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow 
is represented in bits).
+/// Trait expressing a Rust type that has the same in-memory representation as
+/// Arrow.
 ///
-/// In little endian machines, types that implement [`ArrowNativeType`] can be 
memcopied to arrow buffers
-/// as is.
+/// This includes `i16`, `f32`, but excludes `bool` (which in arrow is
+/// represented in bits).
+///
+/// In little endian machines, types that implement [`ArrowNativeType`] can be
+/// memcopied to arrow buffers as is.
 ///
 /// # Transmute Safety
 ///

Reply via email to