This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 378a9fcc9 Update arrow rustdocs (#4071) (#4197)
378a9fcc9 is described below
commit 378a9fcc9ee31fff4a9a13f5de5a326dc449541e
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu May 11 12:24:35 2023 +0100
Update arrow rustdocs (#4071) (#4197)
* Update docs (#4071)
* Review feedback
---
arrow-array/src/array/binary_array.rs | 9 +-
arrow-array/src/array/boolean_array.rs | 2 +-
arrow-array/src/array/byte_array.rs | 2 +-
arrow-array/src/array/dictionary_array.rs | 39 +++---
arrow-array/src/array/fixed_size_binary_array.rs | 2 +-
arrow-array/src/array/fixed_size_list_array.rs | 6 +-
arrow-array/src/array/list_array.rs | 24 ++--
arrow-array/src/array/map_array.rs | 3 +-
arrow-array/src/array/mod.rs | 5 +-
arrow-array/src/array/null_array.rs | 2 +-
arrow-array/src/array/primitive_array.rs | 101 +++++++++-----
arrow-array/src/array/run_array.rs | 17 +--
arrow-array/src/array/string_array.rs | 11 +-
arrow-array/src/array/struct_array.rs | 4 +-
arrow-array/src/array/union_array.rs | 2 +-
arrow-array/src/builder/boolean_buffer_builder.rs | 2 +-
arrow-array/src/builder/boolean_builder.rs | 2 +-
.../src/builder/fixed_size_binary_builder.rs | 8 +-
arrow-array/src/builder/fixed_size_list_builder.rs | 2 +-
.../src/builder/generic_byte_run_builder.rs | 13 +-
arrow-array/src/builder/generic_bytes_builder.rs | 2 +-
.../builder/generic_bytes_dictionary_builder.rs | 19 +--
arrow-array/src/builder/generic_list_builder.rs | 2 +-
arrow-array/src/builder/map_builder.rs | 5 +-
arrow-array/src/builder/mod.rs | 28 ++--
arrow-array/src/builder/primitive_builder.rs | 2 +-
.../src/builder/primitive_dictionary_builder.rs | 4 +-
arrow-array/src/builder/primitive_run_builder.rs | 2 +-
arrow-array/src/builder/struct_builder.rs | 2 +-
arrow-array/src/builder/union_builder.rs | 2 +-
arrow-array/src/lib.rs | 148 ++++++++++++---------
arrow-buffer/src/alloc/mod.rs | 3 +-
arrow-buffer/src/buffer/mod.rs | 3 +-
arrow-buffer/src/buffer/mutable.rs | 2 +-
arrow-buffer/src/buffer/null.rs | 7 +
arrow-buffer/src/buffer/scalar.rs | 20 ++-
arrow-buffer/src/util/bit_iterator.rs | 2 +
arrow/src/lib.rs | 17 +--
38 files changed, 283 insertions(+), 243 deletions(-)
diff --git a/arrow-array/src/array/binary_array.rs
b/arrow-array/src/array/binary_array.rs
index 3b13a513f..a4d64040c 100644
--- a/arrow-array/src/array/binary_array.rs
+++ b/arrow-array/src/array/binary_array.rs
@@ -23,8 +23,7 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
-/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
-/// binary data.
+/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing binary data
pub type GenericBinaryArray<OffsetSize> =
GenericByteArray<GenericBinaryType<OffsetSize>>;
impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
@@ -218,7 +217,8 @@ where
}
}
-/// An array where each element contains 0 or more bytes.
+/// An array of `[u8]` using `i32` offsets
+///
/// The byte length of each element is represented by an i32.
///
/// # Examples
@@ -258,8 +258,7 @@ where
///
pub type BinaryArray = GenericBinaryArray<i32>;
-/// An array where each element contains 0 or more bytes.
-/// The byte length of each element is represented by an i64.
+/// An array of `[u8]` using `i64` offsets
///
/// # Examples
///
diff --git a/arrow-array/src/array/boolean_array.rs
b/arrow-array/src/array/boolean_array.rs
index d03f0fd04..9ecdb2c5d 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -25,7 +25,7 @@ use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;
-/// Array of bools
+/// An array of [boolean
values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
///
/// # Example
///
diff --git a/arrow-array/src/array/byte_array.rs
b/arrow-array/src/array/byte_array.rs
index 12f9aab67..629ffd22c 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -28,7 +28,7 @@ use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;
-/// Generic struct for variable-size byte arrays
+/// An array of [variable length byte
arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-layout)
///
/// See [`StringArray`] and [`LargeStringArray`] for storing utf8 encoded
string data
///
diff --git a/arrow-array/src/array/dictionary_array.rs
b/arrow-array/src/array/dictionary_array.rs
index 75fd4c6d0..a319a836a 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -30,8 +30,7 @@ use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;
-///
-/// A dictionary array where each element is a single value indexed by an
integer key.
+/// A dictionary array indexed by `i8`
///
/// # Example: Using `collect`
/// ```
@@ -44,8 +43,8 @@ use std::sync::Arc;
/// assert_eq!(array.values(), &values);
/// ```
pub type Int8DictionaryArray = DictionaryArray<Int8Type>;
-///
-/// A dictionary array where each element is a single value indexed by an
integer key.
+
+/// A dictionary array indexed by `i16`
///
/// # Example: Using `collect`
/// ```
@@ -58,8 +57,8 @@ pub type Int8DictionaryArray = DictionaryArray<Int8Type>;
/// assert_eq!(array.values(), &values);
/// ```
pub type Int16DictionaryArray = DictionaryArray<Int16Type>;
-///
-/// A dictionary array where each element is a single value indexed by an
integer key.
+
+/// A dictionary array indexed by `i32`
///
/// # Example: Using `collect`
/// ```
@@ -72,8 +71,8 @@ pub type Int16DictionaryArray = DictionaryArray<Int16Type>;
/// assert_eq!(array.values(), &values);
/// ```
pub type Int32DictionaryArray = DictionaryArray<Int32Type>;
-///
-/// A dictionary array where each element is a single value indexed by an
integer key.
+
+/// A dictionary array indexed by `i64`
///
/// # Example: Using `collect`
/// ```
@@ -86,8 +85,8 @@ pub type Int32DictionaryArray = DictionaryArray<Int32Type>;
/// assert_eq!(array.values(), &values);
/// ```
pub type Int64DictionaryArray = DictionaryArray<Int64Type>;
-///
-/// A dictionary array where each element is a single value indexed by an
integer key.
+
+/// A dictionary array indexed by `u8`
///
/// # Example: Using `collect`
/// ```
@@ -100,8 +99,8 @@ pub type Int64DictionaryArray = DictionaryArray<Int64Type>;
/// assert_eq!(array.values(), &values);
/// ```
pub type UInt8DictionaryArray = DictionaryArray<UInt8Type>;
-///
-/// A dictionary array where each element is a single value indexed by an
integer key.
+
+/// A dictionary array indexed by `u16`
///
/// # Example: Using `collect`
/// ```
@@ -114,8 +113,8 @@ pub type UInt8DictionaryArray = DictionaryArray<UInt8Type>;
/// assert_eq!(array.values(), &values);
/// ```
pub type UInt16DictionaryArray = DictionaryArray<UInt16Type>;
-///
-/// A dictionary array where each element is a single value indexed by an
integer key.
+
+/// A dictionary array indexed by `u32`
///
/// # Example: Using `collect`
/// ```
@@ -128,8 +127,8 @@ pub type UInt16DictionaryArray =
DictionaryArray<UInt16Type>;
/// assert_eq!(array.values(), &values);
/// ```
pub type UInt32DictionaryArray = DictionaryArray<UInt32Type>;
-///
-/// A dictionary array where each element is a single value indexed by an
integer key.
+
+/// A dictionary array indexed by `u64`
///
/// # Example: Using `collect`
/// ```
@@ -143,7 +142,8 @@ pub type UInt32DictionaryArray =
DictionaryArray<UInt32Type>;
/// ```
pub type UInt64DictionaryArray = DictionaryArray<UInt64Type>;
-/// A dictionary array where each element is a single value indexed by an
integer key.
+/// An array of [dictionary encoded
values](https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout)
+///
/// This is mostly used to represent strings or a limited set of primitive
types as integers,
/// for example when doing NLP analysis or representing chromosomes by name.
///
@@ -695,8 +695,9 @@ impl<T: ArrowDictionaryKeyType> std::fmt::Debug for
DictionaryArray<T> {
}
}
-/// A strongly-typed wrapper around a [`DictionaryArray`] that implements
[`ArrayAccessor`]
-/// allowing fast access to its elements
+/// A [`DictionaryArray`] typed on its child values array
+///
+/// Implements [`ArrayAccessor`] allowing fast access to its elements
///
/// ```
/// use arrow_array::{DictionaryArray, StringArray, types::Int32Type};
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs
b/arrow-array/src/array/fixed_size_binary_array.rs
index 08ce76c06..083d71cd9 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -25,7 +25,7 @@ use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;
-/// An array where each element is a fixed-size sequence of bytes.
+/// An array of [fixed size binary
arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
///
/// # Examples
///
diff --git a/arrow-array/src/array/fixed_size_list_array.rs
b/arrow-array/src/array/fixed_size_list_array.rs
index 86adafa06..18fa9df92 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -24,8 +24,7 @@ use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;
-/// A list array where each element is a fixed-size sequence of values with
the same
-/// type whose maximum length is represented by a i32.
+/// An array of [fixed size
arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-list-layout)
///
/// # Example
///
@@ -59,9 +58,6 @@ use std::sync::Arc;
/// assert_eq!( &[3, 4, 5],
list1.as_any().downcast_ref::<Int32Array>().unwrap().values());
/// assert_eq!( &[6, 7, 8],
list2.as_any().downcast_ref::<Int32Array>().unwrap().values());
/// ```
-///
-/// For non generic lists, you may wish to consider using
-/// [crate::array::FixedSizeBinaryArray]
#[derive(Clone)]
pub struct FixedSizeListArray {
data_type: DataType, // Must be DataType::FixedSizeList(value_length)
diff --git a/arrow-array/src/array/list_array.rs
b/arrow-array/src/array/list_array.rs
index f4e5b4b79..f4816a61e 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -28,7 +28,15 @@ use num::Integer;
use std::any::Any;
use std::sync::Arc;
-/// trait declaring an offset size, relevant for i32 vs i64 array types.
+/// A type that can be used within a variable-size array to encode offset
information
+///
+/// See [`ListArray`], [`LargeListArray`], [`BinaryArray`],
[`LargeBinaryArray`],
+/// [`StringArray`] and [`LargeStringArray`]
+///
+/// [`BinaryArray`]: crate::array::BinaryArray
+/// [`LargeBinaryArray`]: crate::array::LargeBinaryArray
+/// [`StringArray`]: crate::array::StringArray
+/// [`LargeStringArray`]: crate::array::LargeStringArray
pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
/// True for 64 bit offset size and false for 32 bit offset size
const IS_LARGE: bool;
@@ -46,12 +54,9 @@ impl OffsetSizeTrait for i64 {
const PREFIX: &'static str = "Large";
}
-/// Generic struct for a variable-size list array.
+/// An array of [variable length
arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout)
///
-/// Columnar format in Apache Arrow:
-///
<https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout>
-///
-/// For non generic lists, you may wish to consider using [`ListArray`] or
[`LargeListArray`]`
+/// See [`ListArray`] and [`LargeListArray`]`
pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
data_type: DataType,
nulls: Option<NullBuffer>,
@@ -447,8 +452,7 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for
GenericListArray<OffsetSiz
}
}
-/// A list array where each element is a variable-sized sequence of values
with the same
-/// type whose memory offsets between elements are represented by a i32.
+/// An array of variable size lists, storing offsets as `i32`.
///
/// # Example
///
@@ -475,8 +479,8 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for
GenericListArray<OffsetSiz
/// ```
pub type ListArray = GenericListArray<i32>;
-/// A list array where each element is a variable-sized sequence of values
with the same
-/// type whose memory offsets between elements are represented by a i64.
+/// An array of variable size lists, storing offsets as `i64`.
+///
/// # Example
///
/// ```
diff --git a/arrow-array/src/array/map_array.rs
b/arrow-array/src/array/map_array.rs
index c53e452a6..cf0978f05 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -23,7 +23,8 @@ use arrow_schema::{ArrowError, DataType, Field};
use std::any::Any;
use std::sync::Arc;
-/// A nested array type where each record is a key-value map.
+/// An array of key-value maps
+///
/// Keys should always be non-null, but values can be null.
///
/// [MapArray] is physically a [crate::array::ListArray] that has a
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index e6fd6828b..931277064 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -67,8 +67,7 @@ pub use union_array::*;
mod run_array;
pub use run_array::*;
-/// Trait for dealing with different types of array at runtime when the type
of the
-/// array is not known in advance.
+/// An array in the [arrow columnar
format](https://arrow.apache.org/docs/format/Columnar.html)
pub trait Array: std::fmt::Debug + Send + Sync {
/// Returns the array as [`Any`](std::any::Any) so that it can be
/// downcasted to a specific implementation.
@@ -237,7 +236,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
fn get_array_memory_size(&self) -> usize;
}
-/// A reference-counted reference to a generic `Array`.
+/// A reference-counted reference to a generic `Array`
pub type ArrayRef = Arc<dyn Array>;
/// Ergonomics: Allow use of an ArrayRef as an `&dyn Array`
diff --git a/arrow-array/src/array/null_array.rs
b/arrow-array/src/array/null_array.rs
index c7f61d91d..7fdd99a39 100644
--- a/arrow-array/src/array/null_array.rs
+++ b/arrow-array/src/array/null_array.rs
@@ -24,7 +24,7 @@ use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;
-/// An Array where all elements are nulls
+/// An array of [null
values](https://arrow.apache.org/docs/format/Columnar.html#null-layout)
///
/// A `NullArray` is a simplified array where all values are null.
///
diff --git a/arrow-array/src/array/primitive_array.rs
b/arrow-array/src/array/primitive_array.rs
index 8c8562b5b..35202a4c7 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -34,6 +34,7 @@ use half::f16;
use std::any::Any;
use std::sync::Arc;
+/// An array of `i8`
///
/// # Example: Using `collect`
/// ```
@@ -41,6 +42,8 @@ use std::sync::Arc;
/// let arr : Int8Array = [Some(1), Some(2)].into_iter().collect();
/// ```
pub type Int8Array = PrimitiveArray<Int8Type>;
+
+/// An array of `i16`
///
/// # Example: Using `collect`
/// ```
@@ -48,6 +51,8 @@ pub type Int8Array = PrimitiveArray<Int8Type>;
/// let arr : Int16Array = [Some(1), Some(2)].into_iter().collect();
/// ```
pub type Int16Array = PrimitiveArray<Int16Type>;
+
+/// An array of `i32`
///
/// # Example: Using `collect`
/// ```
@@ -55,6 +60,8 @@ pub type Int16Array = PrimitiveArray<Int16Type>;
/// let arr : Int32Array = [Some(1), Some(2)].into_iter().collect();
/// ```
pub type Int32Array = PrimitiveArray<Int32Type>;
+
+/// An array of `i64`
///
/// # Example: Using `collect`
/// ```
@@ -62,13 +69,16 @@ pub type Int32Array = PrimitiveArray<Int32Type>;
/// let arr : Int64Array = [Some(1), Some(2)].into_iter().collect();
/// ```
pub type Int64Array = PrimitiveArray<Int64Type>;
-///
+
+/// An array of `u8`
/// # Example: Using `collect`
/// ```
/// # use arrow_array::UInt8Array;
/// let arr : UInt8Array = [Some(1), Some(2)].into_iter().collect();
/// ```
pub type UInt8Array = PrimitiveArray<UInt8Type>;
+
+/// An array of `u16`
///
/// # Example: Using `collect`
/// ```
@@ -76,6 +86,8 @@ pub type UInt8Array = PrimitiveArray<UInt8Type>;
/// let arr : UInt16Array = [Some(1), Some(2)].into_iter().collect();
/// ```
pub type UInt16Array = PrimitiveArray<UInt16Type>;
+
+/// An array of `u32`
///
/// # Example: Using `collect`
/// ```
@@ -83,6 +95,8 @@ pub type UInt16Array = PrimitiveArray<UInt16Type>;
/// let arr : UInt32Array = [Some(1), Some(2)].into_iter().collect();
/// ```
pub type UInt32Array = PrimitiveArray<UInt32Type>;
+
+/// An array of `u64`
///
/// # Example: Using `collect`
/// ```
@@ -90,6 +104,8 @@ pub type UInt32Array = PrimitiveArray<UInt32Type>;
/// let arr : UInt64Array = [Some(1), Some(2)].into_iter().collect();
/// ```
pub type UInt64Array = PrimitiveArray<UInt64Type>;
+
+/// An array of `f16`
///
/// # Example: Using `collect`
/// ```
@@ -98,6 +114,8 @@ pub type UInt64Array = PrimitiveArray<UInt64Type>;
/// let arr : Float16Array = [Some(f16::from_f64(1.0)),
Some(f16::from_f64(2.0))].into_iter().collect();
/// ```
pub type Float16Array = PrimitiveArray<Float16Type>;
+
+/// An array of `f32`
///
/// # Example: Using `collect`
/// ```
@@ -105,6 +123,8 @@ pub type Float16Array = PrimitiveArray<Float16Type>;
/// let arr : Float32Array = [Some(1.0), Some(2.0)].into_iter().collect();
/// ```
pub type Float32Array = PrimitiveArray<Float32Type>;
+
+/// An array of `f64`
///
/// # Example: Using `collect`
/// ```
@@ -113,8 +133,11 @@ pub type Float32Array = PrimitiveArray<Float32Type>;
/// ```
pub type Float64Array = PrimitiveArray<Float64Type>;
+/// An array of seconds since UNIX epoch stored as `i64`
+///
+/// This type is similar to the [`chrono::DateTime`] type and can hold
+/// values such as `1970-05-09 14:25:11 +01:00`
///
-/// A primitive array where each element is of type [TimestampSecondType].
/// See also [`Timestamp`](arrow_schema::DataType::Timestamp).
///
/// # Example: UTC timestamps post epoch
@@ -157,82 +180,90 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of milliseconds since UNIX epoch stored as `i64`
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of microseconds since UNIX epoch stored as `i64`
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of nanoseconds since UNIX epoch stored as `i64`
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of days since UNIX epoch stored as `i32`
///
/// This type is similar to the [`chrono::NaiveDate`] type and can hold
/// values such as `2018-11-13`
pub type Date32Array = PrimitiveArray<Date32Type>;
-/// A primitive array where each element is a 64-bit value
-/// representing the elapsed time since the UNIX epoch in milliseconds.
+
+/// An array of milliseconds since UNIX epoch stored as `i64`
///
-/// This type is similar to the [`chrono::NaiveDateTime`] type and can hold
-/// values such as `2018-11-13T17:11:10.011`
+/// This type is similar to the [`chrono::NaiveDate`] type and can hold
+/// values such as `2018-11-13`
pub type Date64Array = PrimitiveArray<Date64Type>;
-/// An array where each element is of 32-bit type representing time elapsed in
seconds
-/// since midnight.
+/// An array of seconds since midnight stored as `i32`
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00`
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-/// An array where each element is of 32-bit type representing time elapsed in
milliseconds
-/// since midnight.
+
+/// An array of milliseconds since midnight stored as `i32`
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00.123`
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
-/// An array where each element is of 64-bit type representing time elapsed in
microseconds
-/// since midnight.
+
+/// An array of microseconds since midnight stored as `i64`
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00.123456`
pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
-/// An array where each element is of 64-bit type representing time elapsed in
nanoseconds
-/// since midnight.
+
+/// An array of nanoseconds since midnight stored as `i64`
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00.123456789`
pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
-/// An array where each element is a “calendar” interval in months.
+/// An array of “calendar” intervals in months
pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>;
-/// An array where each element is a “calendar” interval days and milliseconds.
+
+/// An array of “calendar” intervals in days and milliseconds
pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>;
-/// An array where each element is a “calendar” interval in months, days, and
nanoseconds.
+
+/// An array of “calendar” intervals in months, days, and nanoseconds
pub type IntervalMonthDayNanoArray = PrimitiveArray<IntervalMonthDayNanoType>;
-/// An array where each element is an elapsed time type in seconds.
+/// An array of elapsed durations in seconds
pub type DurationSecondArray = PrimitiveArray<DurationSecondType>;
-/// An array where each element is an elapsed time type in milliseconds.
+
+/// An array of elapsed durations in milliseconds
pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>;
-/// An array where each element is an elapsed time type in microseconds.
+
+/// An array of elapsed durations in microseconds
pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
-/// An array where each element is an elapsed time type in nanoseconds.
+
+/// An array of elapsed durations in nanoseconds
pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;
-/// An array where each element is a 128-bits decimal with precision in [1,
38] and
-/// scale less or equal to 38.
+/// An array of 128-bit fixed point decimals
pub type Decimal128Array = PrimitiveArray<Decimal128Type>;
-/// An array where each element is a 256-bits decimal with precision in [1,
76] and
-/// scale less or equal to 76.
+
+/// An array of 256-bit fixed point decimals
pub type Decimal256Array = PrimitiveArray<Decimal256Type>;
pub use crate::types::ArrowPrimitiveType;
-/// Array whose elements are of primitive types.
+/// An array of [primitive
values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
///
/// # Example: From an iterator of values
///
@@ -890,6 +921,8 @@ impl<'a, T: ArrowPrimitiveType> PrimitiveArray<T> {
}
}
+/// An optional primitive value
+///
/// This struct is used as an adapter when creating `PrimitiveArray` from an
iterator.
/// `FromIterator` for `PrimitiveArray` takes an iterator where the elements
can be `into`
/// this struct. So once implementing `From` or `Into` trait for a type, an
iterator of
diff --git a/arrow-array/src/array/run_array.rs
b/arrow-array/src/array/run_array.rs
index e7e71d384..820d5c9eb 100644
--- a/arrow-array/src/array/run_array.rs
+++ b/arrow-array/src/array/run_array.rs
@@ -30,10 +30,10 @@ use crate::{
Array, ArrayAccessor, ArrayRef, PrimitiveArray,
};
+/// An array of [run-end encoded
values](https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout)
///
-/// A run-end encoding (REE) is a variation of [run-length encoding
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
-///
-/// This encoding is good for representing data containing same values
repeated consecutively.
+/// This encoding is variation on [run-length encoding
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding)
+/// and is good for representing data containing same values repeated
consecutively.
///
/// [`RunArray`] contains `run_ends` array and `values` array of same length.
/// The `run_ends` array stores the indexes at which the run ends. The
`values` array
@@ -428,7 +428,7 @@ impl<'a, T: RunEndIndexType> FromIterator<&'a str> for
RunArray<T> {
}
///
-/// A [`RunArray`] array where run ends are stored using `i16` data type.
+/// A [`RunArray`] with `i16` run ends
///
/// # Example: Using `collect`
/// ```
@@ -443,7 +443,7 @@ impl<'a, T: RunEndIndexType> FromIterator<&'a str> for
RunArray<T> {
pub type Int16RunArray = RunArray<Int16Type>;
///
-/// A [`RunArray`] array where run ends are stored using `i32` data type.
+/// A [`RunArray`] with `i32` run ends
///
/// # Example: Using `collect`
/// ```
@@ -458,7 +458,7 @@ pub type Int16RunArray = RunArray<Int16Type>;
pub type Int32RunArray = RunArray<Int32Type>;
///
-/// A [`RunArray`] array where run ends are stored using `i64` data type.
+/// A [`RunArray`] with `i64` run ends
///
/// # Example: Using `collect`
/// ```
@@ -472,8 +472,9 @@ pub type Int32RunArray = RunArray<Int32Type>;
/// ```
pub type Int64RunArray = RunArray<Int64Type>;
-/// A strongly-typed wrapper around a [`RunArray`] that implements
[`ArrayAccessor`]
-/// and [`IntoIterator`] allowing fast access to its elements
+/// A [`RunArray`] typed typed on its child values array
+///
+/// Implements [`ArrayAccessor`] and [`IntoIterator`] allowing fast access to
its elements
///
/// ```
/// use arrow_array::{RunArray, StringArray, types::Int32Type};
diff --git a/arrow-array/src/array/string_array.rs
b/arrow-array/src/array/string_array.rs
index 7c4a37529..d8f1c5da1 100644
--- a/arrow-array/src/array/string_array.rs
+++ b/arrow-array/src/array/string_array.rs
@@ -21,10 +21,7 @@ use arrow_buffer::{bit_util, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
-/// Generic struct for \[Large\]StringArray
-///
-/// See [`StringArray`] and [`LargeStringArray`] for storing
-/// specific string data.
+/// See [`StringArray`] and [`LargeStringArray`] for storing string data
pub type GenericStringArray<OffsetSize> =
GenericByteArray<GenericStringType<OffsetSize>>;
impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
@@ -211,8 +208,7 @@ impl<OffsetSize: OffsetSizeTrait> From<Vec<String>> for
GenericStringArray<Offse
}
}
-/// An array where each element is a variable-sized sequence of bytes
representing a string
-/// whose maximum length (in bytes) is represented by a i32.
+/// An array of `str` using `i32` offsets
///
/// Example
///
@@ -223,8 +219,7 @@ impl<OffsetSize: OffsetSizeTrait> From<Vec<String>> for
GenericStringArray<Offse
/// ```
pub type StringArray = GenericStringArray<i32>;
-/// An array where each element is a variable-sized sequence of bytes
representing a string
-/// whose maximum length (in bytes) is represented by a i64.
+/// An array of `str` using `i64` offsets
///
/// Example
///
diff --git a/arrow-array/src/array/struct_array.rs
b/arrow-array/src/array/struct_array.rs
index fac947f14..1a79ebd95 100644
--- a/arrow-array/src/array/struct_array.rs
+++ b/arrow-array/src/array/struct_array.rs
@@ -22,9 +22,9 @@ use arrow_schema::{ArrowError, DataType, Field, FieldRef,
Fields, SchemaBuilder}
use std::sync::Arc;
use std::{any::Any, ops::Index};
-/// A nested array type where each child (called *field*) is represented by a
separate
-/// array.
+/// An array of
[structs](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
///
+/// Each child (called *field*) is represented by a separate array.
///
/// # Comparison with [RecordBatch]
///
diff --git a/arrow-array/src/array/union_array.rs
b/arrow-array/src/array/union_array.rs
index 172ae0821..74a5f1efa 100644
--- a/arrow-array/src/array/union_array.rs
+++ b/arrow-array/src/array/union_array.rs
@@ -25,7 +25,7 @@ use arrow_schema::{ArrowError, DataType, Field, UnionFields,
UnionMode};
use std::any::Any;
use std::sync::Arc;
-/// An Array that can represent slots of varying types.
+/// An array of [values of varying
types](https://arrow.apache.org/docs/format/Columnar.html#union-layout)
///
/// Each slot in a [UnionArray] can have a value chosen from a number
/// of types. Each of the possible types are named like the fields of
diff --git a/arrow-array/src/builder/boolean_buffer_builder.rs
b/arrow-array/src/builder/boolean_buffer_builder.rs
index f721504d0..1a3473e19 100644
--- a/arrow-array/src/builder/boolean_buffer_builder.rs
+++ b/arrow-array/src/builder/boolean_buffer_builder.rs
@@ -19,7 +19,7 @@ use arrow_buffer::{bit_util, BooleanBuffer, Buffer,
MutableBuffer};
use arrow_data::bit_mask;
use std::ops::Range;
-/// A builder for creating a boolean [`Buffer`]
+/// Builder for [`BooleanBuffer`]
#[derive(Debug)]
pub struct BooleanBufferBuilder {
buffer: MutableBuffer,
diff --git a/arrow-array/src/builder/boolean_builder.rs
b/arrow-array/src/builder/boolean_builder.rs
index c7974967a..a35e6f6b9 100644
--- a/arrow-array/src/builder/boolean_builder.rs
+++ b/arrow-array/src/builder/boolean_builder.rs
@@ -24,7 +24,7 @@ use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;
-/// Array builder for fixed-width primitive types
+/// Builder for [`BooleanArray`]
///
/// # Example
///
diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs
b/arrow-array/src/builder/fixed_size_binary_builder.rs
index 695b553f0..a354a1db2 100644
--- a/arrow-array/src/builder/fixed_size_binary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_builder.rs
@@ -24,11 +24,11 @@ use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;
-/// A fixed size binary array builder
+/// Builder for [`FixedSizeBinaryArray`]
/// ```
-/// use arrow_array::builder::FixedSizeBinaryBuilder;
-/// use arrow_array::Array;
-///
+/// # use arrow_array::builder::FixedSizeBinaryBuilder;
+/// # use arrow_array::Array;
+/// #
/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
/// // [b"hello", null, b"arrow"]
/// builder.append_value(b"hello").unwrap();
diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs
b/arrow-array/src/builder/fixed_size_list_builder.rs
index 57af76844..ab9fbf5fa 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -24,7 +24,7 @@ use arrow_schema::{DataType, Field};
use std::any::Any;
use std::sync::Arc;
-/// Array builder for [`FixedSizeListArray`]
+/// Builder for [`FixedSizeListArray`]
/// ```
/// use arrow_array::{builder::{Int32Builder, FixedSizeListBuilder}, Array,
Int32Array};
/// let values_builder = Int32Builder::new();
diff --git a/arrow-array/src/builder/generic_byte_run_builder.rs
b/arrow-array/src/builder/generic_byte_run_builder.rs
index 9c26d7be6..97082fe96 100644
--- a/arrow-array/src/builder/generic_byte_run_builder.rs
+++ b/arrow-array/src/builder/generic_byte_run_builder.rs
@@ -30,7 +30,7 @@ use super::{ArrayBuilder, GenericByteBuilder,
PrimitiveBuilder};
use arrow_buffer::ArrowNativeType;
-/// Array builder for [`RunArray`] for String and Binary types.
+/// Builder for [`RunArray`] of
[`GenericByteArray`](crate::array::GenericByteArray)
///
/// # Example:
///
@@ -309,7 +309,7 @@ where
}
}
-/// Array builder for [`RunArray`] that encodes strings ([`Utf8Type`]).
+/// Builder for [`RunArray`] of [`StringArray`](crate::array::StringArray)
///
/// ```
/// // Create a run-end encoded array with run-end indexes data type as `i16`.
@@ -319,7 +319,7 @@ where
/// # use arrow_array::{Int16Array, StringArray};
/// # use arrow_array::types::Int16Type;
/// # use arrow_array::cast::AsArray;
-///
+/// #
/// let mut builder = StringRunBuilder::<Int16Type>::new();
///
/// // The builder builds the dictionary value by value
@@ -342,10 +342,10 @@ where
/// ```
pub type StringRunBuilder<K> = GenericByteRunBuilder<K, Utf8Type>;
-/// Array builder for [`RunArray`] that encodes large strings
([`LargeUtf8Type`]). See [`StringRunBuilder`] for an example.
+/// Builder for [`RunArray`] of
[`LargeStringArray`](crate::array::LargeStringArray)
pub type LargeStringRunBuilder<K> = GenericByteRunBuilder<K, LargeUtf8Type>;
-/// Array builder for [`RunArray`] that encodes binary values([`BinaryType`]).
+/// Builder for [`RunArray`] of [`BinaryArray`](crate::array::BinaryArray)
///
/// ```
/// // Create a run-end encoded array with run-end indexes data type as `i16`.
@@ -378,8 +378,7 @@ pub type LargeStringRunBuilder<K> =
GenericByteRunBuilder<K, LargeUtf8Type>;
/// ```
pub type BinaryRunBuilder<K> = GenericByteRunBuilder<K, BinaryType>;
-/// Array builder for [`RunArray`] that encodes large binary
values([`LargeBinaryType`]).
-/// See documentation of [`BinaryRunBuilder`] for an example.
+/// Builder for [`RunArray`] of
[`LargeBinaryArray`](crate::array::LargeBinaryArray)
pub type LargeBinaryRunBuilder<K> = GenericByteRunBuilder<K, LargeBinaryType>;
#[cfg(test)]
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs
b/arrow-array/src/builder/generic_bytes_builder.rs
index a3598d8bf..1887ab36c 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -25,7 +25,7 @@ use std::any::Any;
use std::fmt::Write;
use std::sync::Arc;
-/// Array builder for [`GenericByteArray`]
+/// Builder for [`GenericByteArray`]
pub struct GenericByteBuilder<T: ByteArrayType> {
value_builder: UInt8BufferBuilder,
offsets_builder: BufferBuilder<T::Offset>,
diff --git a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
index dd9a70b1d..d5c62865f 100644
--- a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
@@ -27,7 +27,8 @@ use hashbrown::HashMap;
use std::any::Any;
use std::sync::Arc;
-/// Generic array builder for `DictionaryArray` that stores generic byte
values.
+/// Builder for [`DictionaryArray`] of [`GenericByteArray`]
+///
/// For example to map a set of byte indices to String values. Note that
/// the use of a `HashMap` here will not scale to very large arrays or
/// result in an ordered dictionary.
@@ -338,9 +339,7 @@ fn get_bytes<T: ByteArrayType>(values:
&GenericByteBuilder<T>, idx: usize) -> &[
&values[start_offset..end_offset]
}
-/// Array builder for `DictionaryArray` that stores Strings. For example to
map a set of byte indices
-/// to String values. Note that the use of a `HashMap` here will not scale to
very large
-/// arrays or result in an ordered dictionary.
+/// Builder for [`DictionaryArray`] of
[`StringArray`](crate::array::StringArray)
///
/// ```
/// // Create a dictionary array indexed by bytes whose values are Strings.
@@ -376,15 +375,11 @@ fn get_bytes<T: ByteArrayType>(values:
&GenericByteBuilder<T>, idx: usize) -> &[
pub type StringDictionaryBuilder<K> =
GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
-/// Array builder for `DictionaryArray` that stores large Strings. For example
to map a set of byte indices
-/// to String values. Note that the use of a `HashMap` here will not scale to
very large
-/// arrays or result in an ordered dictionary.
+/// Builder for [`DictionaryArray`] of
[`LargeStringArray`](crate::array::LargeStringArray)
pub type LargeStringDictionaryBuilder<K> =
GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
-/// Array builder for `DictionaryArray` that stores binary. For example to map
a set of byte indices
-/// to binary values. Note that the use of a `HashMap` here will not scale to
very large
-/// arrays or result in an ordered dictionary.
+/// Builder for [`DictionaryArray`] of
[`BinaryArray`](crate::array::BinaryArray)
///
/// ```
/// // Create a dictionary array indexed by bytes whose values are binary.
@@ -420,9 +415,7 @@ pub type LargeStringDictionaryBuilder<K> =
pub type BinaryDictionaryBuilder<K> =
GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
-/// Array builder for `DictionaryArray` that stores large binary. For example
to map a set of byte indices
-/// to binary values. Note that the use of a `HashMap` here will not scale to
very large
-/// arrays or result in an ordered dictionary.
+/// Builder for [`DictionaryArray`] of
[`LargeBinaryArray`](crate::array::LargeBinaryArray)
pub type LargeBinaryDictionaryBuilder<K> =
GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
diff --git a/arrow-array/src/builder/generic_list_builder.rs
b/arrow-array/src/builder/generic_list_builder.rs
index b6d070798..054c87187 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -24,7 +24,7 @@ use arrow_schema::Field;
use std::any::Any;
use std::sync::Arc;
-/// Array builder for [`GenericListArray`]s.
+/// Builder for [`GenericListArray`]
///
/// Use [`ListBuilder`] to build [`ListArray`]s and [`LargeListBuilder`] to
build [`LargeListArray`]s.
///
diff --git a/arrow-array/src/builder/map_builder.rs
b/arrow-array/src/builder/map_builder.rs
index db85465c8..b73e65b11 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -24,7 +24,8 @@ use arrow_schema::{ArrowError, DataType, Field};
use std::any::Any;
use std::sync::Arc;
-/// Creates a new `MapBuilder`
+/// Builder for [`MapArray`]
+///
/// ```
/// # use arrow_array::builder::{Int32Builder, MapBuilder, StringBuilder};
/// # use arrow_array::{Int32Array, StringArray};
@@ -62,7 +63,7 @@ pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
value_builder: V,
}
-/// Contains details of the mapping
+/// The [`Field`] names for a [`MapArray`]
#[derive(Debug, Clone)]
pub struct MapFieldNames {
/// [`Field`] name for map entries
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index 081f4d5f4..c4f581fbf 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! Defines builders that can be used to safely build arrays
+//! Defines push-based APIs for constructing arrays
//!
//! # Basic Usage
//!
@@ -81,7 +81,9 @@
//! # Custom Builders
//!
//! It is common to have a collection of statically defined Rust types that
-//! you want to convert to Arrow arrays. An example of doing so is below
+//! you want to convert to Arrow arrays.
+//!
+//! An example of doing so is below
//!
//! ```
//! # use std::any::Any;
@@ -261,26 +263,20 @@ pub trait ArrayBuilder: Any + Send {
fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
}
-/// Builder for [`ListArray`]s (i32 offsets)
-///
-/// See [`GenericListBuilder`] for usage examples
-///
-/// [`ListArray`]: crate::array::ListArray
+/// Builder for [`ListArray`](crate::array::ListArray)
pub type ListBuilder<T> = GenericListBuilder<i32, T>;
-/// Builder for [`LargeListArray`]s (i64 offsets)
-///
-/// See [`GenericListBuilder`] for usage examples
-///
-/// [`LargeListArray`]: crate::array::LargeListArray
+/// Builder for [`LargeListArray`](crate::array::LargeListArray)
pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
-/// A binary array builder with i32 offsets
+/// Builder for [`BinaryArray`](crate::array::BinaryArray)
pub type BinaryBuilder = GenericBinaryBuilder<i32>;
-/// A binary array builder with i64 offsets
+
+/// Builder for [`LargeBinaryArray`](crate::array::LargeBinaryArray)
pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
-/// A string array builder with i32 offsets
+/// Builder for [`StringArray`](crate::array::StringArray)
pub type StringBuilder = GenericStringBuilder<i32>;
-/// A string array builder with i64 offsets
+
+/// Builder for [`LargeStringArray`](crate::array::LargeStringArray)
pub type LargeStringBuilder = GenericStringBuilder<i64>;
diff --git a/arrow-array/src/builder/primitive_builder.rs
b/arrow-array/src/builder/primitive_builder.rs
index 8721004d2..440fb8a4b 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -92,7 +92,7 @@ pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
/// A decimal 256 array builder
pub type Decimal256Builder = PrimitiveBuilder<Decimal256Type>;
-/// Array builder for fixed-width primitive types
+/// Builder for [`PrimitiveArray`]
#[derive(Debug)]
pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
values_builder: BufferBuilder<T::Native>,
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs
b/arrow-array/src/builder/primitive_dictionary_builder.rs
index 41880d3a4..cde1abe22 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -45,9 +45,7 @@ impl<T: ToByteSlice> PartialEq for Value<T> {
impl<T: ToByteSlice> Eq for Value<T> {}
-/// Array builder for `DictionaryArray`. For example to map a set of byte
indices
-/// to f32 values. Note that the use of a `HashMap` here will not scale to
very large
-/// arrays or result in an ordered dictionary.
+/// Builder for [`DictionaryArray`] of
[`PrimitiveArray`](crate::array::PrimitiveArray)
///
/// # Example:
///
diff --git a/arrow-array/src/builder/primitive_run_builder.rs
b/arrow-array/src/builder/primitive_run_builder.rs
index 30750b6f3..53674a73b 100644
--- a/arrow-array/src/builder/primitive_run_builder.rs
+++ b/arrow-array/src/builder/primitive_run_builder.rs
@@ -23,7 +23,7 @@ use super::{ArrayBuilder, PrimitiveBuilder};
use arrow_buffer::ArrowNativeType;
-/// Array builder for [`RunArray`] that encodes primitive values.
+/// Builder for [`RunArray`] of
[`PrimitiveArray`](crate::array::PrimitiveArray)
///
/// # Example:
///
diff --git a/arrow-array/src/builder/struct_builder.rs
b/arrow-array/src/builder/struct_builder.rs
index 4702bb734..41ede9c7a 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -24,7 +24,7 @@ use arrow_schema::{DataType, Fields, IntervalUnit, TimeUnit};
use std::any::Any;
use std::sync::Arc;
-/// Array builder for Struct types.
+/// Builder for [`StructArray`]
///
/// Note that callers should make sure that methods of all the child field
builders are
/// properly called to maintain the consistency of the data structure.
diff --git a/arrow-array/src/builder/union_builder.rs
b/arrow-array/src/builder/union_builder.rs
index 8ca303da8..6461a56aa 100644
--- a/arrow-array/src/builder/union_builder.rs
+++ b/arrow-array/src/builder/union_builder.rs
@@ -99,7 +99,7 @@ impl FieldData {
}
}
-/// Builder type for creating a new `UnionArray`.
+/// Builder for [`UnionArray`]
///
/// Example: **Dense Memory Layout**
///
diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs
index ff1ddb1f6..6ee9f7f1d 100644
--- a/arrow-array/src/lib.rs
+++ b/arrow-array/src/lib.rs
@@ -19,42 +19,6 @@
//! all having the same type. This crate provides concrete implementations of
each type, as
//! well as an [`Array`] trait that can be used for type-erasure.
//!
-//! # Downcasting an Array
-//!
-//! Arrays are often passed around as a dynamically typed [`&dyn Array`] or
[`ArrayRef`].
-//! For example, [`RecordBatch`](`crate::RecordBatch`) stores columns as
[`ArrayRef`].
-//!
-//! Whilst these arrays can be passed directly to the [`compute`], [`csv`],
[`json`], etc... APIs,
-//! it is often the case that you wish to interact with the data directly.
-//!
-//! This requires downcasting to the concrete type of the array:
-//!
-//! ```
-//! # use arrow_array::{Array, Float32Array, Int32Array};
-//!
-//! fn sum_int32(array: &dyn Array) -> i32 {
-//! let integers: &Int32Array = array.as_any().downcast_ref().unwrap();
-//! integers.iter().map(|val| val.unwrap_or_default()).sum()
-//! }
-//!
-//! // Note: the values for positions corresponding to nulls will be arbitrary
-//! fn as_f32_slice(array: &dyn Array) -> &[f32] {
-//! array.as_any().downcast_ref::<Float32Array>().unwrap().values()
-//! }
-//! ```
-//!
-//! The [`cast::AsArray`] extension trait can make this more ergonomic
-//!
-//! ```
-//! # use arrow_array::Array;
-//! # use arrow_array::cast::{AsArray, as_primitive_array};
-//! # use arrow_array::types::Float32Type;
-//!
-//! fn as_f32_slice(array: &dyn Array) -> &[f32] {
-//! array.as_primitive::<Float32Type>().values()
-//! }
-//! ```
-
//! # Building an Array
//!
//! Most [`Array`] implementations can be constructed directly from iterators
or [`Vec`]
@@ -62,7 +26,7 @@
//! ```
//! # use arrow_array::{Int32Array, ListArray, StringArray};
//! # use arrow_array::types::Int32Type;
-//!
+//! #
//! Int32Array::from(vec![1, 2]);
//! Int32Array::from(vec![Some(1), None]);
//! Int32Array::from_iter([1, 2, 3, 4]);
@@ -91,30 +55,59 @@
//!
//! // Append a single primitive value
//! builder.append_value(1);
-//!
//! // Append a null value
//! builder.append_null();
-//!
//! // Append a slice of primitive values
//! builder.append_slice(&[2, 3, 4]);
//!
//! // Build the array
//! let array = builder.finish();
//!
-//! assert_eq!(
-//! 5,
-//! array.len(),
-//! "The array has 5 values, counting the null value"
-//! );
+//! assert_eq!(5, array.len());
+//! assert_eq!(2, array.value(2));
+//! assert_eq!(&array.values()[3..5], &[3, 4])
+//! ```
//!
-//! assert_eq!(2, array.value(2), "Get the value with index 2");
+//! # Low-level API
+//!
+//! Internally, arrays consist of one or more shared memory regions backed by
a [`Buffer`],
+//! the number and meaning of which depend on the array’s data type, as
documented in
+//! the [Arrow specification].
+//!
+//! For example, the type [`Int16Array`] represents an array of 16-bit
integers and consists of:
+//!
+//! * An optional [`NullBuffer`] identifying any null values
+//! * A contiguous [`ScalarBuffer<i16>`] of values
+//!
+//! Similarly, the type [`StringArray`] represents an array of UTF-8 strings
and consists of:
+//!
+//! * An optional [`NullBuffer`] identifying any null values
+//! * An offsets [`OffsetBuffer<i32>`] identifying valid UTF-8 sequences
within the values buffer
+//! * A values [`Buffer`] of UTF-8 encoded string data
+//!
+//! Array constructors such as [`PrimitiveArray::try_new`] provide the ability
to cheaply
+//! construct an array from these parts, with functions such as
[`PrimitiveArray::into_parts`]
+//! providing the reverse operation.
//!
-//! assert_eq!(
-//! &array.values()[3..5],
-//! &[3, 4],
-//! "Get slice of len 2 starting at idx 3"
-//! )
//! ```
+//! # use arrow_array::{Array, Int32Array, StringArray};
+//! # use arrow_buffer::OffsetBuffer;
+//! #
+//! // Create a Int32Array from Vec without copying
+//! let array = Int32Array::new(vec![1, 2, 3].into(), None);
+//! assert_eq!(array.values(), &[1, 2, 3]);
+//! assert_eq!(array.null_count(), 0);
+//!
+//! // Create a StringArray from parts
+//! let offsets = OffsetBuffer::new(vec![0, 5, 10].into());
+//! let array = StringArray::new(offsets, b"helloworld".into(), None);
+//! let values: Vec<_> = array.iter().map(|x| x.unwrap()).collect();
+//! assert_eq!(values, &["hello", "world"]);
+//! ```
+//!
+//! As [`Buffer`], and its derivatives, can be created from [`Vec`] without
copying, this provides
+//! an efficient way to not only interoperate with other Rust code, but also
implement kernels
+//! optimised for the arrow data layout - e.g. by handling buffers instead of
values.
//!
//! # Zero-Copy Slicing
//!
@@ -122,32 +115,57 @@
//! data. Internally this just increments some ref-counts, and so is
incredibly cheap
//!
//! ```rust
-//! # use std::sync::Arc;
-//! # use arrow_array::{ArrayRef, Int32Array};
-//! let array = Arc::new(Int32Array::from_iter([1, 2, 3])) as ArrayRef;
+//! # use arrow_array::Int32Array;
+//! let array = Int32Array::from_iter([1, 2, 3]);
//!
//! // Slice with offset 1 and length 2
//! let sliced = array.slice(1, 2);
-//! let ints = sliced.as_any().downcast_ref::<Int32Array>().unwrap();
-//! assert_eq!(ints.values(), &[2, 3]);
+//! assert_eq!(sliced.values(), &[2, 3]);
//! ```
//!
-//! # Internal Representation
+//! # Downcasting an Array
//!
-//! Internally, arrays are represented by one or several [`Buffer`], the
number and meaning of
-//! which depend on the array’s data type, as documented in the [Arrow
specification].
+//! Arrays are often passed around as a dynamically typed [`&dyn Array`] or
[`ArrayRef`].
+//! For example, [`RecordBatch`](`crate::RecordBatch`) stores columns as
[`ArrayRef`].
//!
-//! For example, the type [`Int16Array`] represents an array of 16-bit
integers and consists of:
+//! Whilst these arrays can be passed directly to the [`compute`], [`csv`],
[`json`], etc... APIs,
+//! it is often the case that you wish to interact with the concrete arrays
directly.
//!
-//! * An optional [`NullBuffer`] identifying any null values
-//! * A contiguous [`Buffer`] of 16-bit integers
+//! This requires downcasting to the concrete type of the array:
//!
-//! Similarly, the type [`StringArray`] represents an array of UTF-8 strings
and consists of:
+//! ```
+//! # use arrow_array::{Array, Float32Array, Int32Array};
//!
-//! * An optional [`NullBuffer`] identifying any null values
-//! * An offsets [`Buffer`] of 32-bit integers identifying valid UTF-8
sequences within the values buffer
-//! * A values [`Buffer`] of UTF-8 encoded string data
+//! // Safely downcast an `Array` to an `Int32Array` and compute the sum
+//! // using native i32 values
+//! fn sum_int32(array: &dyn Array) -> i32 {
+//! let integers: &Int32Array = array.as_any().downcast_ref().unwrap();
+//! integers.iter().map(|val| val.unwrap_or_default()).sum()
+//! }
//!
+//! // Safely downcasts the array to a `Float32Array` and returns a &[f32]
view of the data
+//! // Note: the values for positions corresponding to nulls will be arbitrary
(but still valid f32)
+//! fn as_f32_slice(array: &dyn Array) -> &[f32] {
+//! array.as_any().downcast_ref::<Float32Array>().unwrap().values()
+//! }
+//! ```
+//!
+//! The [`cast::AsArray`] extension trait can make this more ergonomic
+//!
+//! ```
+//! # use arrow_array::Array;
+//! # use arrow_array::cast::{AsArray, as_primitive_array};
+//! # use arrow_array::types::Float32Type;
+//!
+//! fn as_f32_slice(array: &dyn Array) -> &[f32] {
+//! array.as_primitive::<Float32Type>().values()
+//! }
+//! ```
+//!
+//! [`ScalarBuffer<T>`]: arrow_buffer::ScalarBuffer
+//! [`ScalarBuffer<i16>`]: arrow_buffer::ScalarBuffer
+//! [`OffsetBuffer<i32>`]: arrow_buffer::OffsetBuffer
+//! [`NullBuffer`]: arrow_buffer::NullBuffer
//! [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html
//! [`&dyn Array`]: Array
//! [`NullBuffer`]: arrow_buffer::NullBuffer
diff --git a/arrow-buffer/src/alloc/mod.rs b/arrow-buffer/src/alloc/mod.rs
index d1236eeaa..a3cb6253f 100644
--- a/arrow-buffer/src/alloc/mod.rs
+++ b/arrow-buffer/src/alloc/mod.rs
@@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! Defines memory-related functions, such as allocate/deallocate/reallocate
memory
-//! regions, cache and allocation alignments.
+//! Defines the low-level [`Allocation`] API for shared memory regions
use std::alloc::Layout;
use std::fmt::{Debug, Formatter};
diff --git a/arrow-buffer/src/buffer/mod.rs b/arrow-buffer/src/buffer/mod.rs
index ed53d3361..d33e68795 100644
--- a/arrow-buffer/src/buffer/mod.rs
+++ b/arrow-buffer/src/buffer/mod.rs
@@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! This module contains two main structs: [Buffer] and [MutableBuffer]. A
buffer represents
-//! a contiguous memory region that can be shared via `offsets`.
+//! Types of shared memory region
mod offset;
pub use offset::*;
diff --git a/arrow-buffer/src/buffer/mutable.rs
b/arrow-buffer/src/buffer/mutable.rs
index 9a905a322..43c1cd004 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -36,7 +36,7 @@ use super::Buffer;
/// Use [MutableBuffer::push] to insert an item,
[MutableBuffer::extend_from_slice]
/// to insert many items, and `into` to convert it to [`Buffer`].
///
-/// For a safe, strongly typed API consider using `Vec`
+/// For a safe, strongly typed API consider using [`Vec`] and
[`ScalarBuffer`](crate::ScalarBuffer)
///
/// Note: this may be deprecated in a future release
([#1176](https://github.com/apache/arrow-rs/issues/1176))
///
diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index cdb0c2aeb..60987be6e 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -19,6 +19,13 @@ use crate::bit_iterator::{BitIndexIterator, BitIterator,
BitSliceIterator};
use crate::buffer::BooleanBuffer;
use crate::{Buffer, MutableBuffer};
+/// A [`BooleanBuffer`] used to encode validity for arrow arrays
+///
+/// As per the [Arrow specification], array validity is encoded in a packed
bitmask with a
+/// `true` value indicating the corresponding slot is not null, and `false`
indicating
+/// that it is null.
+///
+/// [Arrow specification]:
https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct NullBuffer {
buffer: BooleanBuffer,
diff --git a/arrow-buffer/src/buffer/scalar.rs
b/arrow-buffer/src/buffer/scalar.rs
index 1a4680111..40b24e4eb 100644
--- a/arrow-buffer/src/buffer/scalar.rs
+++ b/arrow-buffer/src/buffer/scalar.rs
@@ -22,12 +22,24 @@ use std::fmt::Formatter;
use std::marker::PhantomData;
use std::ops::Deref;
-/// Provides a safe API for interpreting a [`Buffer`] as a slice of
[`ArrowNativeType`]
+/// A strongly-typed [`Buffer`] supporting zero-copy cloning and slicing
///
-/// # Safety
+/// The easiest way to think about `ScalarBuffer<T>` is being equivalent to a
`Arc<Vec<T>>`,
+/// with the following differences:
///
-/// All [`ArrowNativeType`] are valid for all possible backing byte
representations, and as
-/// a result they are "trivially safely transmutable".
+/// - slicing and cloning is O(1).
+/// - it supports external allocated memory
+///
+/// ```
+/// # use arrow_buffer::ScalarBuffer;
+/// // Zero-copy conversion from Vec
+/// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
+/// assert_eq!(&buffer, &[1, 2, 3]);
+///
+/// // Zero-copy slicing
+/// let sliced = buffer.slice(1, 2);
+/// assert_eq!(&sliced, &[2, 3]);
+/// ```
#[derive(Clone)]
pub struct ScalarBuffer<T: ArrowNativeType> {
/// Underlying data buffer
diff --git a/arrow-buffer/src/util/bit_iterator.rs
b/arrow-buffer/src/util/bit_iterator.rs
index 1a8dd9226..4e24ccdab 100644
--- a/arrow-buffer/src/util/bit_iterator.rs
+++ b/arrow-buffer/src/util/bit_iterator.rs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+//! Types for iterating over packed bitmasks
+
use crate::bit_chunk_iterator::{UnalignedBitChunk, UnalignedBitChunkIterator};
use crate::bit_util::{ceil, get_bit_raw};
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 27c905ba0..af5972acc 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -92,7 +92,7 @@
//! assert_eq!(sum(&TimestampNanosecondArray::from(vec![1, 2, 3])), 6);
//! ```
//!
-//! And the following is generic over all arrays with comparable values
+//! And the following is generic over all arrays with comparable values:
//!
//! ```rust
//! # use arrow::array::{ArrayAccessor, ArrayIter, Int32Array, StringArray};
@@ -109,7 +109,7 @@
//! assert_eq!(min(&StringArray::from(vec!["b", "a", "c"])), Some("a"));
//! ```
//!
-//! For more examples, consult the [arrow_array] docs.
+//! For more examples, and details consult the [arrow_array] docs.
//!
//! # Type Erasure / Trait Objects
//!
@@ -317,19 +317,6 @@
//! assert_eq!(string.value(1), "foo");
//! ```
//!
-//! # Memory and Buffers
-//!
-//! Advanced users may wish to interact with the underlying buffers of an
[`Array`], for example,
-//! for FFI or high-performance conversion from other formats. This interface
is provided by
-//! [`ArrayData`] which stores the [`Buffer`] comprising an [`Array`], and can
be accessed
-//! with [`Array::to_data`](array::Array::to_data)
-//!
-//! The APIs for constructing [`ArrayData`] come in safe, and unsafe variants,
with the former
-//! performing extensive, but potentially expensive validation to ensure the
buffers are well-formed.
-//!
-//! An [`ArrayRef`] can be cheaply created from an [`ArrayData`] using
[`make_array`],
-//! or by using the appropriate [`From`] conversion on the concrete [`Array`]
implementation.
-//!
//! # Safety and Security
//!
//! Like many crates, this crate makes use of unsafe where prudent. However,
it endeavours to be