This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 6fb59d0843 Improve `GenericStringBuilder` documentation (#6372)
6fb59d0843 is described below
commit 6fb59d084359a41b9b8d234c2b299225c91b9759
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Sep 11 11:30:19 2024 -0400
Improve `GenericStringBuilder` documentation (#6372)
* Improve GenericStringBuilder documentation
* Update arrow-array/src/builder/generic_bytes_builder.rs
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---------
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---
arrow-array/src/builder/generic_bytes_builder.rs | 57 +++++++++++++++++++-----
1 file changed, 47 insertions(+), 10 deletions(-)
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs
b/arrow-array/src/builder/generic_bytes_builder.rs
index 516f45be34..e85a2fed01 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -60,8 +60,9 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
/// Creates a new [`GenericByteBuilder`] from buffers.
///
/// # Safety
- /// This doesn't verify buffer contents as it assumes the buffers are from
existing and
- /// valid [`GenericByteArray`].
+ ///
+ /// This doesn't verify buffer contents as it assumes the buffers are from
+ /// existing and valid [`GenericByteArray`].
pub unsafe fn new_from_buffer(
offsets_buffer: MutableBuffer,
value_buffer: MutableBuffer,
@@ -88,9 +89,19 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
/// Appends a value into the builder.
///
+ /// See the [GenericStringBuilder] documentation for examples of
+ /// incrementally building string values with multiple `write!` calls.
+ ///
/// # Panics
///
- /// Panics if the resulting length of [`Self::values_slice`] would exceed
`T::Offset::MAX`
+ /// Panics if the resulting length of [`Self::values_slice`] would exceed
+ /// `T::Offset::MAX` bytes.
+ ///
+ /// For example, this can happen with [`StringArray`] or [`BinaryArray`]
+ /// where the total length of all values exceeds 2GB
+ ///
+ /// [`StringArray`]: crate::StringArray
+ /// [`BinaryArray`]: crate::BinaryArray
#[inline]
pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
self.value_builder.append_slice(value.as_ref().as_ref());
@@ -99,6 +110,11 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
}
/// Append an `Option` value into the builder.
+ ///
+ /// - A `None` value will append a null value.
+ /// - A `Some` value will append the value.
+ ///
+ /// See [`Self::append_value`] for more panic information.
#[inline]
pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
match value {
@@ -227,30 +243,51 @@ impl<T: ByteArrayType, V: AsRef<T::Native>>
Extend<Option<V>> for GenericByteBui
/// Values can be appended using [`GenericByteBuilder::append_value`], and
nulls with
/// [`GenericByteBuilder::append_null`].
///
-/// Additionally, implements [`std::fmt::Write`] with any written data
included in the next
-/// appended value. This allows use with [`std::fmt::Display`] without
intermediate allocations
+/// This builder also implements [`std::fmt::Write`] with any written data
+/// included in the next appended value. This allows using
[`std::fmt::Display`]
+/// with standard Rust idioms like `write!` and `writeln!` to write data
+/// directly to the builder without intermediate allocations.
+///
+/// # Example writing strings with `append_value`
+/// ```
+/// # use arrow_array::builder::GenericStringBuilder;
+/// let mut builder = GenericStringBuilder::<i32>::new();
+///
+/// // Write one string value
+/// builder.append_value("foobarbaz");
+///
+/// // Write a second string
+/// builder.append_value("v2");
+///
+/// let array = builder.finish();
+/// assert_eq!(array.value(0), "foobarbaz");
+/// assert_eq!(array.value(1), "v2");
+/// ```
+///
+/// # Example incrementally writing strings with `std::fmt::Write`
///
-/// # Example
/// ```
/// # use std::fmt::Write;
/// # use arrow_array::builder::GenericStringBuilder;
/// let mut builder = GenericStringBuilder::<i32>::new();
///
-/// // Write data
+/// // Write data in multiple `write!` calls
/// write!(builder, "foo").unwrap();
/// write!(builder, "bar").unwrap();
-///
-/// // Finish value
+/// // The next call to append_value finishes the current string
+/// // including all previously written strings.
/// builder.append_value("baz");
///
-/// // Write second value
+/// // Write second value with a single write call
/// write!(builder, "v2").unwrap();
+/// // finish the value by calling append_value with an empty string
/// builder.append_value("");
///
/// let array = builder.finish();
/// assert_eq!(array.value(0), "foobarbaz");
/// assert_eq!(array.value(1), "v2");
/// ```
+///
pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
impl<O: OffsetSizeTrait> Write for GenericStringBuilder<O> {